#include <linux/blkdev.h>
#include <linux/sysctl.h>
#include <linux/seq_file.h>
-#include <linux/mutex.h>
-#include <linux/buffer_head.h> /* for invalidate_bdev */
+#include <linux/fs.h>
#include <linux/poll.h>
#include <linux/ctype.h>
#include <linux/string.h>
#include <linux/hdreg.h>
#include <linux/proc_fs.h>
#include <linux/random.h>
+#include <linux/module.h>
#include <linux/reboot.h>
#include <linux/file.h>
#include <linux/compat.h>
#include "md.h"
#include "bitmap.h"
-#define DEBUG 0
-#define dprintk(x...) ((void)(DEBUG && printk(x)))
-
#ifndef MODULE
static void autostart_arrays(int part);
#endif
+/* pers_list is a list of registered personalities protected
+ * by pers_lock.
+ * pers_lock does extra service to protect accesses to
+ * mddev->thread when the mutex cannot be held.
+ */
static LIST_HEAD(pers_list);
static DEFINE_SPINLOCK(pers_lock);
static int sysctl_speed_limit_min = 1000;
static int sysctl_speed_limit_max = 200000;
-static inline int speed_min(mddev_t *mddev)
+static inline int speed_min(struct mddev *mddev)
{
return mddev->sync_speed_min ?
mddev->sync_speed_min : sysctl_speed_limit_min;
}
-static inline int speed_max(mddev_t *mddev)
+static inline int speed_max(struct mddev *mddev)
{
return mddev->sync_speed_max ?
mddev->sync_speed_max : sysctl_speed_limit_max;
static void mddev_bio_destructor(struct bio *bio)
{
- mddev_t *mddev, **mddevp;
+ struct mddev *mddev, **mddevp;
mddevp = (void*)bio;
mddev = mddevp[-1];
}
struct bio *bio_alloc_mddev(gfp_t gfp_mask, int nr_iovecs,
- mddev_t *mddev)
+ struct mddev *mddev)
{
struct bio *b;
- mddev_t **mddevp;
+ struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_alloc(gfp_mask, nr_iovecs);
EXPORT_SYMBOL_GPL(bio_alloc_mddev);
struct bio *bio_clone_mddev(struct bio *bio, gfp_t gfp_mask,
- mddev_t *mddev)
+ struct mddev *mddev)
{
struct bio *b;
- mddev_t **mddevp;
+ struct mddev **mddevp;
if (!mddev || !mddev->bio_set)
return bio_clone(bio, gfp_mask);
}
EXPORT_SYMBOL_GPL(bio_clone_mddev);
+void md_trim_bio(struct bio *bio, int offset, int size)
+{
+ /* 'bio' is a cloned bio which we need to trim to match
+ * the given offset and size.
+ * This requires adjusting bi_sector, bi_size, and bi_io_vec
+ */
+ int i;
+ struct bio_vec *bvec;
+ int sofar = 0;
+
+ size <<= 9;
+ if (offset == 0 && size == bio->bi_size)
+ return;
+
+ bio->bi_sector += offset;
+ bio->bi_size = size;
+ offset <<= 9;
+ clear_bit(BIO_SEG_VALID, &bio->bi_flags);
+
+ while (bio->bi_idx < bio->bi_vcnt &&
+ bio->bi_io_vec[bio->bi_idx].bv_len <= offset) {
+ /* remove this whole bio_vec */
+ offset -= bio->bi_io_vec[bio->bi_idx].bv_len;
+ bio->bi_idx++;
+ }
+ if (bio->bi_idx < bio->bi_vcnt) {
+ bio->bi_io_vec[bio->bi_idx].bv_offset += offset;
+ bio->bi_io_vec[bio->bi_idx].bv_len -= offset;
+ }
+ /* avoid any complications with bi_idx being non-zero*/
+ if (bio->bi_idx) {
+ memmove(bio->bi_io_vec, bio->bi_io_vec+bio->bi_idx,
+ (bio->bi_vcnt - bio->bi_idx) * sizeof(struct bio_vec));
+ bio->bi_vcnt -= bio->bi_idx;
+ bio->bi_idx = 0;
+ }
+ /* Make sure vcnt and last bv are not too big */
+ bio_for_each_segment(bvec, bio, i) {
+ if (sofar + bvec->bv_len > size)
+ bvec->bv_len = size - sofar;
+ if (bvec->bv_len == 0) {
+ bio->bi_vcnt = i;
+ break;
+ }
+ sofar += bvec->bv_len;
+ }
+}
+EXPORT_SYMBOL_GPL(md_trim_bio);
+
/*
* We have a system wide 'event count' that is incremented
* on any 'interesting' event, and readers of /proc/mdstat
*/
static DECLARE_WAIT_QUEUE_HEAD(md_event_waiters);
static atomic_t md_event_count;
-void md_new_event(mddev_t *mddev)
+void md_new_event(struct mddev *mddev)
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
/* Alternate version that can be called from interrupts
* when calling sysfs_notify isn't needed.
*/
-static void md_new_event_inintr(mddev_t *mddev)
+static void md_new_event_inintr(struct mddev *mddev)
{
atomic_inc(&md_event_count);
wake_up(&md_event_waiters);
* Any code which breaks out of this loop while own
* a reference to the current mddev and must mddev_put it.
*/
-#define for_each_mddev(mddev,tmp) \
+#define for_each_mddev(_mddev,_tmp) \
\
for (({ spin_lock(&all_mddevs_lock); \
- tmp = all_mddevs.next; \
- mddev = NULL;}); \
- ({ if (tmp != &all_mddevs) \
- mddev_get(list_entry(tmp, mddev_t, all_mddevs));\
+ _tmp = all_mddevs.next; \
+ _mddev = NULL;}); \
+ ({ if (_tmp != &all_mddevs) \
+ mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\
spin_unlock(&all_mddevs_lock); \
- if (mddev) mddev_put(mddev); \
- mddev = list_entry(tmp, mddev_t, all_mddevs); \
- tmp != &all_mddevs;}); \
+ if (_mddev) mddev_put(_mddev); \
+ _mddev = list_entry(_tmp, struct mddev, all_mddevs); \
+ _tmp != &all_mddevs;}); \
({ spin_lock(&all_mddevs_lock); \
- tmp = tmp->next;}) \
+ _tmp = _tmp->next;}) \
)
* call has finished, the bio has been linked into some internal structure
* and so is visible to ->quiesce(), so we don't need the refcount any more.
*/
-static int md_make_request(struct request_queue *q, struct bio *bio)
+static void md_make_request(struct request_queue *q, struct bio *bio)
{
const int rw = bio_data_dir(bio);
- mddev_t *mddev = q->queuedata;
- int rv;
+ struct mddev *mddev = q->queuedata;
int cpu;
unsigned int sectors;
if (mddev == NULL || mddev->pers == NULL
|| !mddev->ready) {
bio_io_error(bio);
- return 0;
+ return;
}
smp_rmb(); /* Ensure implications of 'active' are visible */
rcu_read_lock();
* go away inside make_request
*/
sectors = bio_sectors(bio);
- rv = mddev->pers->make_request(mddev, bio);
+ mddev->pers->make_request(mddev, bio);
cpu = part_stat_lock();
part_stat_inc(cpu, &mddev->gendisk->part0, ios[rw]);
if (atomic_dec_and_test(&mddev->active_io) && mddev->suspended)
wake_up(&mddev->sb_wait);
-
- return rv;
}
/* mddev_suspend makes sure no new requests are submitted
* Once ->stop is called and completes, the module will be completely
* unused.
*/
-void mddev_suspend(mddev_t *mddev)
+void mddev_suspend(struct mddev *mddev)
{
BUG_ON(mddev->suspended);
mddev->suspended = 1;
synchronize_rcu();
wait_event(mddev->sb_wait, atomic_read(&mddev->active_io) == 0);
mddev->pers->quiesce(mddev, 1);
+
+ del_timer_sync(&mddev->safemode_timer);
}
EXPORT_SYMBOL_GPL(mddev_suspend);
-void mddev_resume(mddev_t *mddev)
+void mddev_resume(struct mddev *mddev)
{
mddev->suspended = 0;
wake_up(&mddev->sb_wait);
}
EXPORT_SYMBOL_GPL(mddev_resume);
-int mddev_congested(mddev_t *mddev, int bits)
+int mddev_congested(struct mddev *mddev, int bits)
{
return mddev->suspended;
}
static void md_end_flush(struct bio *bio, int err)
{
- mdk_rdev_t *rdev = bio->bi_private;
- mddev_t *mddev = rdev->mddev;
+ struct md_rdev *rdev = bio->bi_private;
+ struct mddev *mddev = rdev->mddev;
rdev_dec_pending(rdev, mddev);
static void submit_flushes(struct work_struct *ws)
{
- mddev_t *mddev = container_of(ws, mddev_t, flush_work);
- mdk_rdev_t *rdev;
+ struct mddev *mddev = container_of(ws, struct mddev, flush_work);
+ struct md_rdev *rdev;
INIT_WORK(&mddev->flush_work, md_submit_flush_data);
atomic_set(&mddev->flush_pending, 1);
rcu_read_lock();
- list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags)) {
/* Take two references, one is dropped
static void md_submit_flush_data(struct work_struct *ws)
{
- mddev_t *mddev = container_of(ws, mddev_t, flush_work);
+ struct mddev *mddev = container_of(ws, struct mddev, flush_work);
struct bio *bio = mddev->flush_bio;
if (bio->bi_size == 0)
bio_endio(bio, 0);
else {
bio->bi_rw &= ~REQ_FLUSH;
- if (mddev->pers->make_request(mddev, bio))
- generic_make_request(bio);
+ mddev->pers->make_request(mddev, bio);
}
mddev->flush_bio = NULL;
wake_up(&mddev->sb_wait);
}
-void md_flush_request(mddev_t *mddev, struct bio *bio)
+void md_flush_request(struct mddev *mddev, struct bio *bio)
{
spin_lock_irq(&mddev->write_lock);
wait_event_lock_irq(mddev->sb_wait,
*/
struct md_plug_cb {
struct blk_plug_cb cb;
- mddev_t *mddev;
+ struct mddev *mddev;
};
static void plugger_unplug(struct blk_plug_cb *cb)
/* Check that an unplug wakeup will come shortly.
* If not, wakeup the md thread immediately
*/
-int mddev_check_plugged(mddev_t *mddev)
+int mddev_check_plugged(struct mddev *mddev)
{
struct blk_plug *plug = current->plug;
struct md_plug_cb *mdcb;
}
EXPORT_SYMBOL_GPL(mddev_check_plugged);
-static inline mddev_t *mddev_get(mddev_t *mddev)
+static inline struct mddev *mddev_get(struct mddev *mddev)
{
atomic_inc(&mddev->active);
return mddev;
static void mddev_delayed_delete(struct work_struct *ws);
-static void mddev_put(mddev_t *mddev)
+static void mddev_put(struct mddev *mddev)
{
struct bio_set *bs = NULL;
mddev->ctime == 0 && !mddev->hold_active) {
/* Array is not configured at all, and not held active,
* so destroy it */
- list_del(&mddev->all_mddevs);
+ list_del_init(&mddev->all_mddevs);
bs = mddev->bio_set;
mddev->bio_set = NULL;
if (mddev->gendisk) {
bioset_free(bs);
}
-void mddev_init(mddev_t *mddev)
+void mddev_init(struct mddev *mddev)
{
mutex_init(&mddev->open_mutex);
mutex_init(&mddev->reconfig_mutex);
}
EXPORT_SYMBOL_GPL(mddev_init);
-static mddev_t * mddev_find(dev_t unit)
+static struct mddev * mddev_find(dev_t unit)
{
- mddev_t *mddev, *new = NULL;
+ struct mddev *mddev, *new = NULL;
if (unit && MAJOR(unit) != MD_MAJOR)
unit &= ~((1<<MdpMinorShift)-1);
goto retry;
}
-static inline int mddev_lock(mddev_t * mddev)
+static inline int mddev_lock(struct mddev * mddev)
{
return mutex_lock_interruptible(&mddev->reconfig_mutex);
}
-static inline int mddev_is_locked(mddev_t *mddev)
+static inline int mddev_is_locked(struct mddev *mddev)
{
return mutex_is_locked(&mddev->reconfig_mutex);
}
-static inline int mddev_trylock(mddev_t * mddev)
+static inline int mddev_trylock(struct mddev * mddev)
{
return mutex_trylock(&mddev->reconfig_mutex);
}
static struct attribute_group md_redundancy_group;
-static void mddev_unlock(mddev_t * mddev)
+static void mddev_unlock(struct mddev * mddev)
{
if (mddev->to_remove) {
/* These cannot be removed under reconfig_mutex as
} else
mutex_unlock(&mddev->reconfig_mutex);
+ /* As we've dropped the mutex we need a spinlock to
+ * make sure the thread doesn't disappear
+ */
+ spin_lock(&pers_lock);
md_wakeup_thread(mddev->thread);
+ spin_unlock(&pers_lock);
}
-static mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
+static struct md_rdev * find_rdev_nr(struct mddev *mddev, int nr)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
if (rdev->desc_nr == nr)
return rdev;
return NULL;
}
-static mdk_rdev_t * find_rdev(mddev_t * mddev, dev_t dev)
+static struct md_rdev * find_rdev(struct mddev * mddev, dev_t dev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
if (rdev->bdev->bd_dev == dev)
return rdev;
return NULL;
}
-static struct mdk_personality *find_pers(int level, char *clevel)
+static struct md_personality *find_pers(int level, char *clevel)
{
- struct mdk_personality *pers;
+ struct md_personality *pers;
list_for_each_entry(pers, &pers_list, list) {
if (level != LEVEL_NONE && pers->level == level)
return pers;
}
/* return the offset of the super block in 512byte sectors */
-static inline sector_t calc_dev_sboffset(mdk_rdev_t *rdev)
+static inline sector_t calc_dev_sboffset(struct md_rdev *rdev)
{
sector_t num_sectors = i_size_read(rdev->bdev->bd_inode) / 512;
return MD_NEW_SIZE_SECTORS(num_sectors);
}
-static int alloc_disk_sb(mdk_rdev_t * rdev)
+static int alloc_disk_sb(struct md_rdev * rdev)
{
if (rdev->sb_page)
MD_BUG();
return 0;
}
-static void free_disk_sb(mdk_rdev_t * rdev)
+static void free_disk_sb(struct md_rdev * rdev)
{
if (rdev->sb_page) {
put_page(rdev->sb_page);
rdev->sb_start = 0;
rdev->sectors = 0;
}
+ if (rdev->bb_page) {
+ put_page(rdev->bb_page);
+ rdev->bb_page = NULL;
+ }
}
static void super_written(struct bio *bio, int error)
{
- mdk_rdev_t *rdev = bio->bi_private;
- mddev_t *mddev = rdev->mddev;
+ struct md_rdev *rdev = bio->bi_private;
+ struct mddev *mddev = rdev->mddev;
if (error || !test_bit(BIO_UPTODATE, &bio->bi_flags)) {
printk("md: super_written gets error=%d, uptodate=%d\n",
bio_put(bio);
}
-void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev,
+void md_super_write(struct mddev *mddev, struct md_rdev *rdev,
sector_t sector, int size, struct page *page)
{
/* write first size bytes of page to sector of rdev
bio->bi_end_io = super_written;
atomic_inc(&mddev->pending_writes);
- submit_bio(REQ_WRITE | REQ_SYNC | REQ_FLUSH | REQ_FUA, bio);
+ submit_bio(WRITE_FLUSH_FUA, bio);
}
-void md_super_wait(mddev_t *mddev)
+void md_super_wait(struct mddev *mddev)
{
/* wait for all superblock writes that were scheduled to complete */
DEFINE_WAIT(wq);
complete((struct completion*)bio->bi_private);
}
-int sync_page_io(mdk_rdev_t *rdev, sector_t sector, int size,
+int sync_page_io(struct md_rdev *rdev, sector_t sector, int size,
struct page *page, int rw, bool metadata_op)
{
struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, rdev->mddev);
}
EXPORT_SYMBOL_GPL(sync_page_io);
-static int read_disk_sb(mdk_rdev_t * rdev, int size)
+static int read_disk_sb(struct md_rdev * rdev, int size)
{
char b[BDEVNAME_SIZE];
if (!rdev->sb_page) {
* We rely on user-space to write the initial superblock, and support
* reading and updating of superblocks.
* Interface methods are:
- * int load_super(mdk_rdev_t *dev, mdk_rdev_t *refdev, int minor_version)
+ * int load_super(struct md_rdev *dev, struct md_rdev *refdev, int minor_version)
* loads and validates a superblock on dev.
* if refdev != NULL, compare superblocks on both devices
* Return:
* -EINVAL superblock incompatible or invalid
* -othererror e.g. -EIO
*
- * int validate_super(mddev_t *mddev, mdk_rdev_t *dev)
+ * int validate_super(struct mddev *mddev, struct md_rdev *dev)
* Verify that dev is acceptable into mddev.
* The first time, mddev->raid_disks will be 0, and data from
* dev should be merged in. Subsequent calls check that dev
* is new enough. Return 0 or -EINVAL
*
- * void sync_super(mddev_t *mddev, mdk_rdev_t *dev)
+ * void sync_super(struct mddev *mddev, struct md_rdev *dev)
* Update the superblock for rdev with data in mddev
* This does not write to disc.
*
struct super_type {
char *name;
struct module *owner;
- int (*load_super)(mdk_rdev_t *rdev, mdk_rdev_t *refdev,
+ int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev,
int minor_version);
- int (*validate_super)(mddev_t *mddev, mdk_rdev_t *rdev);
- void (*sync_super)(mddev_t *mddev, mdk_rdev_t *rdev);
- unsigned long long (*rdev_size_change)(mdk_rdev_t *rdev,
+ int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev);
+ void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev);
+ unsigned long long (*rdev_size_change)(struct md_rdev *rdev,
sector_t num_sectors);
};
* has a bitmap. Otherwise, it returns 0.
*
*/
-int md_check_no_bitmap(mddev_t *mddev)
+int md_check_no_bitmap(struct mddev *mddev)
{
if (!mddev->bitmap_info.file && !mddev->bitmap_info.offset)
return 0;
/*
* load_super for 0.90.0
*/
-static int super_90_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
+static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
{
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
mdp_super_t *sb;
ret = -EINVAL;
bdevname(rdev->bdev, b);
- sb = (mdp_super_t*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
if (sb->md_magic != MD_SB_MAGIC) {
printk(KERN_ERR "md: invalid raid superblock magic on %s\n",
rdev->preferred_minor = sb->md_minor;
rdev->data_offset = 0;
rdev->sb_size = MD_SB_BYTES;
+ rdev->badblocks.shift = -1;
if (sb->level == LEVEL_MULTIPATH)
rdev->desc_nr = -1;
ret = 1;
} else {
__u64 ev1, ev2;
- mdp_super_t *refsb = (mdp_super_t*)page_address(refdev->sb_page);
+ mdp_super_t *refsb = page_address(refdev->sb_page);
if (!uuid_equal(refsb, sb)) {
printk(KERN_WARNING "md: %s has different UUID to %s\n",
b, bdevname(refdev->bdev,b2));
ret = 0;
}
rdev->sectors = rdev->sb_start;
+ /* Limit to 4TB as metadata cannot record more than that */
+ if (rdev->sectors >= (2ULL << 32))
+ rdev->sectors = (2ULL << 32) - 2;
- if (rdev->sectors < sb->size * 2 && sb->level > 1)
+ if (rdev->sectors < ((sector_t)sb->size) * 2 && sb->level >= 1)
/* "this cannot possibly happen" ... */
ret = -EINVAL;
/*
* validate_super for 0.90.0
*/
-static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev)
{
mdp_disk_t *desc;
- mdp_super_t *sb = (mdp_super_t *)page_address(rdev->sb_page);
+ mdp_super_t *sb = page_address(rdev->sb_page);
__u64 ev1 = md_event(sb);
rdev->raid_disk = -1;
mddev->clevel[0] = 0;
mddev->layout = sb->layout;
mddev->raid_disks = sb->raid_disks;
- mddev->dev_sectors = sb->size * 2;
+ mddev->dev_sectors = ((sector_t)sb->size) * 2;
mddev->events = ev1;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9;
/*
* sync_super for 0.90.0
*/
-static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev)
{
mdp_super_t *sb;
- mdk_rdev_t *rdev2;
+ struct md_rdev *rdev2;
int next_spare = mddev->raid_disks;
rdev->sb_size = MD_SB_BYTES;
- sb = (mdp_super_t*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
memset(sb, 0, sizeof(*sb));
sb->state |= (1<<MD_SB_BITMAP_PRESENT);
sb->disks[0].state = (1<<MD_DISK_REMOVED);
- list_for_each_entry(rdev2, &mddev->disks, same_set) {
+ rdev_for_each(rdev2, mddev) {
mdp_disk_t *d;
int desc_nr;
int is_active = test_bit(In_sync, &rdev2->flags);
* rdev_size_change for 0.90.0
*/
static unsigned long long
-super_90_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
+super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
{
if (num_sectors && num_sectors < rdev->mddev->dev_sectors)
return 0; /* component must fit device */
rdev->sb_start = calc_dev_sboffset(rdev);
if (!num_sectors || num_sectors > rdev->sb_start)
num_sectors = rdev->sb_start;
+ /* Limit to 4TB as metadata cannot record more than that.
+ * 4TB == 2^32 KB, or 2*2^32 sectors.
+ */
+ if (num_sectors >= (2ULL << 32))
+ num_sectors = (2ULL << 32) - 2;
md_super_write(rdev->mddev, rdev, rdev->sb_start, rdev->sb_size,
rdev->sb_page);
md_super_wait(rdev->mddev);
return cpu_to_le32(csum);
}
-static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+ int acknowledged);
+static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_version)
{
struct mdp_superblock_1 *sb;
int ret;
if (ret) return ret;
- sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
if (sb->magic != cpu_to_le32(MD_SB_MAGIC) ||
sb->major_version != cpu_to_le32(1) ||
else
rdev->desc_nr = le32_to_cpu(sb->dev_number);
+ if (!rdev->bb_page) {
+ rdev->bb_page = alloc_page(GFP_KERNEL);
+ if (!rdev->bb_page)
+ return -ENOMEM;
+ }
+ if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BAD_BLOCKS) &&
+ rdev->badblocks.count == 0) {
+ /* need to load the bad block list.
+ * Currently we limit it to one page.
+ */
+ s32 offset;
+ sector_t bb_sector;
+ u64 *bbp;
+ int i;
+ int sectors = le16_to_cpu(sb->bblog_size);
+ if (sectors > (PAGE_SIZE / 512))
+ return -EINVAL;
+ offset = le32_to_cpu(sb->bblog_offset);
+ if (offset == 0)
+ return -EINVAL;
+ bb_sector = (long long)offset;
+ if (!sync_page_io(rdev, bb_sector, sectors << 9,
+ rdev->bb_page, READ, true))
+ return -EIO;
+ bbp = (u64 *)page_address(rdev->bb_page);
+ rdev->badblocks.shift = sb->bblog_shift;
+ for (i = 0 ; i < (sectors << (9-3)) ; i++, bbp++) {
+ u64 bb = le64_to_cpu(*bbp);
+ int count = bb & (0x3ff);
+ u64 sector = bb >> 10;
+ sector <<= sb->bblog_shift;
+ count <<= sb->bblog_shift;
+ if (bb + 1 == 0)
+ break;
+ if (md_set_badblocks(&rdev->badblocks,
+ sector, count, 1) == 0)
+ return -EINVAL;
+ }
+ } else if (sb->bblog_offset == 0)
+ rdev->badblocks.shift = -1;
+
if (!refdev) {
ret = 1;
} else {
__u64 ev1, ev2;
- struct mdp_superblock_1 *refsb =
- (struct mdp_superblock_1*)page_address(refdev->sb_page);
+ struct mdp_superblock_1 *refsb = page_address(refdev->sb_page);
if (memcmp(sb->set_uuid, refsb->set_uuid, 16) != 0 ||
sb->level != refsb->level ||
return ret;
}
-static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev)
+static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev)
{
- struct mdp_superblock_1 *sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ struct mdp_superblock_1 *sb = page_address(rdev->sb_page);
__u64 ev1 = le64_to_cpu(sb->events);
rdev->raid_disk = -1;
}
if (sb->devflags & WriteMostly1)
set_bit(WriteMostly, &rdev->flags);
+ if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT)
+ set_bit(Replacement, &rdev->flags);
} else /* MULTIPATH are always insync */
set_bit(In_sync, &rdev->flags);
return 0;
}
-static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
+static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
{
struct mdp_superblock_1 *sb;
- mdk_rdev_t *rdev2;
+ struct md_rdev *rdev2;
int max_dev, i;
/* make rdev->sb match mddev and rdev data. */
- sb = (struct mdp_superblock_1*)page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
sb->feature_map = 0;
sb->pad0 = 0;
sb->recovery_offset = cpu_to_le64(0);
memset(sb->pad1, 0, sizeof(sb->pad1));
- memset(sb->pad2, 0, sizeof(sb->pad2));
memset(sb->pad3, 0, sizeof(sb->pad3));
sb->utime = cpu_to_le64((__u64)mddev->utime);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
+ if (test_bit(WriteMostly, &rdev->flags))
+ sb->devflags |= WriteMostly1;
+ else
+ sb->devflags &= ~WriteMostly1;
+
if (mddev->bitmap && mddev->bitmap_info.file == NULL) {
sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset);
sb->feature_map = cpu_to_le32(MD_FEATURE_BITMAP_OFFSET);
sb->recovery_offset =
cpu_to_le64(rdev->recovery_offset);
}
+ if (test_bit(Replacement, &rdev->flags))
+ sb->feature_map |=
+ cpu_to_le32(MD_FEATURE_REPLACEMENT);
if (mddev->reshape_position != MaxSector) {
sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_ACTIVE);
sb->new_chunk = cpu_to_le32(mddev->new_chunk_sectors);
}
+ if (rdev->badblocks.count == 0)
+ /* Nothing to do for bad blocks*/ ;
+ else if (sb->bblog_offset == 0)
+ /* Cannot record bad blocks on this device */
+ md_error(mddev, rdev);
+ else {
+ struct badblocks *bb = &rdev->badblocks;
+ u64 *bbp = (u64 *)page_address(rdev->bb_page);
+ u64 *p = bb->page;
+ sb->feature_map |= cpu_to_le32(MD_FEATURE_BAD_BLOCKS);
+ if (bb->changed) {
+ unsigned seq;
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+
+ memset(bbp, 0xff, PAGE_SIZE);
+
+ for (i = 0 ; i < bb->count ; i++) {
+ u64 internal_bb = *p++;
+ u64 store_bb = ((BB_OFFSET(internal_bb) << 10)
+ | BB_LEN(internal_bb));
+ *bbp++ = cpu_to_le64(store_bb);
+ }
+ bb->changed = 0;
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ bb->sector = (rdev->sb_start +
+ (int)le32_to_cpu(sb->bblog_offset));
+ bb->size = le16_to_cpu(sb->bblog_size);
+ }
+ }
+
max_dev = 0;
- list_for_each_entry(rdev2, &mddev->disks, same_set)
+ rdev_for_each(rdev2, mddev)
if (rdev2->desc_nr+1 > max_dev)
max_dev = rdev2->desc_nr+1;
for (i=0; i<max_dev;i++)
sb->dev_roles[i] = cpu_to_le16(0xfffe);
- list_for_each_entry(rdev2, &mddev->disks, same_set) {
+ rdev_for_each(rdev2, mddev) {
i = rdev2->desc_nr;
if (test_bit(Faulty, &rdev2->flags))
sb->dev_roles[i] = cpu_to_le16(0xfffe);
}
static unsigned long long
-super_1_rdev_size_change(mdk_rdev_t *rdev, sector_t num_sectors)
+super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors)
{
struct mdp_superblock_1 *sb;
sector_t max_sectors;
num_sectors = max_sectors;
rdev->sb_start = sb_start;
}
- sb = (struct mdp_superblock_1 *) page_address(rdev->sb_page);
+ sb = page_address(rdev->sb_page);
sb->data_size = cpu_to_le64(num_sectors);
sb->super_offset = rdev->sb_start;
sb->sb_csum = calc_sb_1_csum(sb);
},
};
-static void sync_super(mddev_t *mddev, mdk_rdev_t *rdev)
+static void sync_super(struct mddev *mddev, struct md_rdev *rdev)
{
if (mddev->sync_super) {
mddev->sync_super(mddev, rdev);
super_types[mddev->major_version].sync_super(mddev, rdev);
}
-static int match_mddev_units(mddev_t *mddev1, mddev_t *mddev2)
+static int match_mddev_units(struct mddev *mddev1, struct mddev *mddev2)
{
- mdk_rdev_t *rdev, *rdev2;
+ struct md_rdev *rdev, *rdev2;
rcu_read_lock();
rdev_for_each_rcu(rdev, mddev1)
* from the array. It only succeeds if all working and active component devices
* are integrity capable with matching profiles.
*/
-int md_integrity_register(mddev_t *mddev)
+int md_integrity_register(struct mddev *mddev)
{
- mdk_rdev_t *rdev, *reference = NULL;
+ struct md_rdev *rdev, *reference = NULL;
if (list_empty(&mddev->disks))
return 0; /* nothing to do */
if (!mddev->gendisk || blk_get_integrity(mddev->gendisk))
return 0; /* shouldn't register, or already is */
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
/* skip spares and non-functional disks */
if (test_bit(Faulty, &rdev->flags))
continue;
EXPORT_SYMBOL(md_integrity_register);
/* Disable data integrity if non-capable/non-matching disk is being added */
-void md_integrity_add_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
+void md_integrity_add_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
struct blk_integrity *bi_rdev = bdev_get_integrity(rdev->bdev);
struct blk_integrity *bi_mddev = blk_get_integrity(mddev->gendisk);
}
EXPORT_SYMBOL(md_integrity_add_rdev);
-static int bind_rdev_to_array(mdk_rdev_t * rdev, mddev_t * mddev)
+static int bind_rdev_to_array(struct md_rdev * rdev, struct mddev * mddev)
{
char b[BDEVNAME_SIZE];
struct kobject *ko;
static void md_delayed_delete(struct work_struct *ws)
{
- mdk_rdev_t *rdev = container_of(ws, mdk_rdev_t, del_work);
+ struct md_rdev *rdev = container_of(ws, struct md_rdev, del_work);
kobject_del(&rdev->kobj);
kobject_put(&rdev->kobj);
}
-static void unbind_rdev_from_array(mdk_rdev_t * rdev)
+static void unbind_rdev_from_array(struct md_rdev * rdev)
{
char b[BDEVNAME_SIZE];
if (!rdev->mddev) {
sysfs_remove_link(&rdev->kobj, "block");
sysfs_put(rdev->sysfs_state);
rdev->sysfs_state = NULL;
+ kfree(rdev->badblocks.page);
+ rdev->badblocks.count = 0;
+ rdev->badblocks.page = NULL;
/* We need to delay this, otherwise we can deadlock when
* writing to 'remove' to "dev/state". We also need
* to delay it due to rcu usage.
* otherwise reused by a RAID array (or any other kernel
* subsystem), by bd_claiming the device.
*/
-static int lock_rdev(mdk_rdev_t *rdev, dev_t dev, int shared)
+static int lock_rdev(struct md_rdev *rdev, dev_t dev, int shared)
{
int err = 0;
struct block_device *bdev;
char b[BDEVNAME_SIZE];
bdev = blkdev_get_by_dev(dev, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
- shared ? (mdk_rdev_t *)lock_rdev : rdev);
+ shared ? (struct md_rdev *)lock_rdev : rdev);
if (IS_ERR(bdev)) {
printk(KERN_ERR "md: could not open %s.\n",
__bdevname(dev, b));
return err;
}
-static void unlock_rdev(mdk_rdev_t *rdev)
+static void unlock_rdev(struct md_rdev *rdev)
{
struct block_device *bdev = rdev->bdev;
rdev->bdev = NULL;
void md_autodetect_dev(dev_t dev);
-static void export_rdev(mdk_rdev_t * rdev)
+static void export_rdev(struct md_rdev * rdev)
{
char b[BDEVNAME_SIZE];
printk(KERN_INFO "md: export_rdev(%s)\n",
kobject_put(&rdev->kobj);
}
-static void kick_rdev_from_array(mdk_rdev_t * rdev)
+static void kick_rdev_from_array(struct md_rdev * rdev)
{
unbind_rdev_from_array(rdev);
export_rdev(rdev);
}
-static void export_array(mddev_t *mddev)
+static void export_array(struct mddev *mddev)
{
- mdk_rdev_t *rdev, *tmp;
+ struct md_rdev *rdev, *tmp;
- rdev_for_each(rdev, tmp, mddev) {
+ rdev_for_each_safe(rdev, tmp, mddev) {
if (!rdev->mddev) {
MD_BUG();
continue;
);
}
-static void print_rdev(mdk_rdev_t *rdev, int major_version)
+static void print_rdev(struct md_rdev *rdev, int major_version)
{
char b[BDEVNAME_SIZE];
printk(KERN_INFO "md: rdev %s, Sect:%08llu F:%d S:%d DN:%u\n",
printk(KERN_INFO "md: rdev superblock (MJ:%d):\n", major_version);
switch (major_version) {
case 0:
- print_sb_90((mdp_super_t*)page_address(rdev->sb_page));
+ print_sb_90(page_address(rdev->sb_page));
break;
case 1:
- print_sb_1((struct mdp_superblock_1 *)page_address(rdev->sb_page));
+ print_sb_1(page_address(rdev->sb_page));
break;
}
} else
static void md_print_devices(void)
{
struct list_head *tmp;
- mdk_rdev_t *rdev;
- mddev_t *mddev;
+ struct md_rdev *rdev;
+ struct mddev *mddev;
char b[BDEVNAME_SIZE];
printk("\n");
bitmap_print_sb(mddev->bitmap);
else
printk("%s: ", mdname(mddev));
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
printk("<%s>", bdevname(rdev->bdev,b));
printk("\n");
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
print_rdev(rdev, mddev->major_version);
}
printk("md: **********************************\n");
}
-static void sync_sbs(mddev_t * mddev, int nospares)
+static void sync_sbs(struct mddev * mddev, int nospares)
{
/* Update each superblock (in-memory image), but
* if we are allowed to, skip spares which already
* (which would mean they aren't being marked as dirty
* with the rest of the array)
*/
- mdk_rdev_t *rdev;
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ struct md_rdev *rdev;
+ rdev_for_each(rdev, mddev) {
if (rdev->sb_events == mddev->events ||
(nospares &&
rdev->raid_disk < 0 &&
}
}
-static void md_update_sb(mddev_t * mddev, int force_change)
+static void md_update_sb(struct mddev * mddev, int force_change)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int sync_req;
int nospares = 0;
+ int any_badblocks_changed = 0;
repeat:
/* First make sure individual recovery_offsets are correct */
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
if (rdev->raid_disk >= 0 &&
mddev->delta_disks >= 0 &&
!test_bit(In_sync, &rdev->flags) &&
if (!mddev->persistent) {
clear_bit(MD_CHANGE_CLEAN, &mddev->flags);
clear_bit(MD_CHANGE_DEVS, &mddev->flags);
- if (!mddev->external)
+ if (!mddev->external) {
clear_bit(MD_CHANGE_PENDING, &mddev->flags);
+ rdev_for_each(rdev, mddev) {
+ if (rdev->badblocks.changed) {
+ rdev->badblocks.changed = 0;
+ md_ack_all_badblocks(&rdev->badblocks);
+ md_error(mddev, rdev);
+ }
+ clear_bit(Blocked, &rdev->flags);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ }
+ }
wake_up(&mddev->sb_wait);
return;
}
MD_BUG();
mddev->events --;
}
+
+ rdev_for_each(rdev, mddev) {
+ if (rdev->badblocks.changed)
+ any_badblocks_changed++;
+ if (test_bit(Faulty, &rdev->flags))
+ set_bit(FaultRecorded, &rdev->flags);
+ }
+
sync_sbs(mddev, nospares);
spin_unlock_irq(&mddev->write_lock);
- dprintk(KERN_INFO
- "md: updating %s RAID superblock on device (in sync %d)\n",
- mdname(mddev),mddev->in_sync);
+ pr_debug("md: updating %s RAID superblock on device (in sync %d)\n",
+ mdname(mddev), mddev->in_sync);
bitmap_update_sb(mddev->bitmap);
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
- dprintk(KERN_INFO "md: ");
+
if (rdev->sb_loaded != 1)
continue; /* no noise on spare devices */
- if (test_bit(Faulty, &rdev->flags))
- dprintk("(skipping faulty ");
- dprintk("%s ", bdevname(rdev->bdev,b));
- if (!test_bit(Faulty, &rdev->flags)) {
+ if (!test_bit(Faulty, &rdev->flags) &&
+ rdev->saved_raid_disk == -1) {
md_super_write(mddev,rdev,
rdev->sb_start, rdev->sb_size,
rdev->sb_page);
- dprintk(KERN_INFO "(write) %s's sb offset: %llu\n",
- bdevname(rdev->bdev,b),
- (unsigned long long)rdev->sb_start);
+ pr_debug("md: (write) %s's sb offset: %llu\n",
+ bdevname(rdev->bdev, b),
+ (unsigned long long)rdev->sb_start);
rdev->sb_events = mddev->events;
+ if (rdev->badblocks.size) {
+ md_super_write(mddev, rdev,
+ rdev->badblocks.sector,
+ rdev->badblocks.size << 9,
+ rdev->bb_page);
+ rdev->badblocks.size = 0;
+ }
+
+ } else if (test_bit(Faulty, &rdev->flags))
+ pr_debug("md: %s (skipping faulty)\n",
+ bdevname(rdev->bdev, b));
+ else
+ pr_debug("(skipping incremental s/r ");
- } else
- dprintk(")\n");
if (mddev->level == LEVEL_MULTIPATH)
/* only need to write one superblock... */
break;
if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))
sysfs_notify(&mddev->kobj, NULL, "sync_completed");
+ rdev_for_each(rdev, mddev) {
+ if (test_and_clear_bit(FaultRecorded, &rdev->flags))
+ clear_bit(Blocked, &rdev->flags);
+
+ if (any_badblocks_changed)
+ md_ack_all_badblocks(&rdev->badblocks);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
+ wake_up(&rdev->blocked_wait);
+ }
}
/* words written to sysfs files may, or may not, be \n terminated.
struct rdev_sysfs_entry {
struct attribute attr;
- ssize_t (*show)(mdk_rdev_t *, char *);
- ssize_t (*store)(mdk_rdev_t *, const char *, size_t);
+ ssize_t (*show)(struct md_rdev *, char *);
+ ssize_t (*store)(struct md_rdev *, const char *, size_t);
};
static ssize_t
-state_show(mdk_rdev_t *rdev, char *page)
+state_show(struct md_rdev *rdev, char *page)
{
char *sep = "";
size_t len = 0;
- if (test_bit(Faulty, &rdev->flags)) {
+ if (test_bit(Faulty, &rdev->flags) ||
+ rdev->badblocks.unacked_exist) {
len+= sprintf(page+len, "%sfaulty",sep);
sep = ",";
}
len += sprintf(page+len, "%swrite_mostly",sep);
sep = ",";
}
- if (test_bit(Blocked, &rdev->flags)) {
+ if (test_bit(Blocked, &rdev->flags) ||
+ (rdev->badblocks.unacked_exist
+ && !test_bit(Faulty, &rdev->flags))) {
len += sprintf(page+len, "%sblocked", sep);
sep = ",";
}
len += sprintf(page+len, "%sspare", sep);
sep = ",";
}
+ if (test_bit(WriteErrorSeen, &rdev->flags)) {
+ len += sprintf(page+len, "%swrite_error", sep);
+ sep = ",";
+ }
+ if (test_bit(WantReplacement, &rdev->flags)) {
+ len += sprintf(page+len, "%swant_replacement", sep);
+ sep = ",";
+ }
+ if (test_bit(Replacement, &rdev->flags)) {
+ len += sprintf(page+len, "%sreplacement", sep);
+ sep = ",";
+ }
+
return len+sprintf(page+len, "\n");
}
static ssize_t
-state_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+state_store(struct md_rdev *rdev, const char *buf, size_t len)
{
/* can write
- * faulty - simulates and error
+ * faulty - simulates an error
* remove - disconnects the device
* writemostly - sets write_mostly
* -writemostly - clears write_mostly
- * blocked - sets the Blocked flag
- * -blocked - clears the Blocked flag
+ * blocked - sets the Blocked flags
+ * -blocked - clears the Blocked and possibly simulates an error
* insync - sets Insync providing device isn't active
+ * write_error - sets WriteErrorSeen
+ * -write_error - clears WriteErrorSeen
*/
int err = -EINVAL;
if (cmd_match(buf, "faulty") && rdev->mddev->pers) {
md_error(rdev->mddev, rdev);
- err = 0;
+ if (test_bit(Faulty, &rdev->flags))
+ err = 0;
+ else
+ err = -EBUSY;
} else if (cmd_match(buf, "remove")) {
if (rdev->raid_disk >= 0)
err = -EBUSY;
else {
- mddev_t *mddev = rdev->mddev;
+ struct mddev *mddev = rdev->mddev;
kick_rdev_from_array(rdev);
if (mddev->pers)
md_update_sb(mddev, 1);
set_bit(Blocked, &rdev->flags);
err = 0;
} else if (cmd_match(buf, "-blocked")) {
+ if (!test_bit(Faulty, &rdev->flags) &&
+ rdev->badblocks.unacked_exist) {
+ /* metadata handler doesn't understand badblocks,
+ * so we need to fail the device
+ */
+ md_error(rdev->mddev, rdev);
+ }
clear_bit(Blocked, &rdev->flags);
+ clear_bit(BlockedBadBlocks, &rdev->flags);
wake_up(&rdev->blocked_wait);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
} else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) {
set_bit(In_sync, &rdev->flags);
err = 0;
+ } else if (cmd_match(buf, "write_error")) {
+ set_bit(WriteErrorSeen, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "-write_error")) {
+ clear_bit(WriteErrorSeen, &rdev->flags);
+ err = 0;
+ } else if (cmd_match(buf, "want_replacement")) {
+ /* Any non-spare device that is not a replacement can
+ * become want_replacement at any time, but we then need to
+ * check if recovery is needed.
+ */
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(Replacement, &rdev->flags))
+ set_bit(WantReplacement, &rdev->flags);
+ set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
+ md_wakeup_thread(rdev->mddev->thread);
+ err = 0;
+ } else if (cmd_match(buf, "-want_replacement")) {
+ /* Clearing 'want_replacement' is always allowed.
+ * Once replacements starts it is too late though.
+ */
+ err = 0;
+ clear_bit(WantReplacement, &rdev->flags);
+ } else if (cmd_match(buf, "replacement")) {
+ /* Can only set a device as a replacement when array has not
+ * yet been started. Once running, replacement is automatic
+ * from spares, or by assigning 'slot'.
+ */
+ if (rdev->mddev->pers)
+ err = -EBUSY;
+ else {
+ set_bit(Replacement, &rdev->flags);
+ err = 0;
+ }
+ } else if (cmd_match(buf, "-replacement")) {
+ /* Similarly, can only clear Replacement before start */
+ if (rdev->mddev->pers)
+ err = -EBUSY;
+ else {
+ clear_bit(Replacement, &rdev->flags);
+ err = 0;
+ }
}
if (!err)
sysfs_notify_dirent_safe(rdev->sysfs_state);
__ATTR(state, S_IRUGO|S_IWUSR, state_show, state_store);
static ssize_t
-errors_show(mdk_rdev_t *rdev, char *page)
+errors_show(struct md_rdev *rdev, char *page)
{
return sprintf(page, "%d\n", atomic_read(&rdev->corrected_errors));
}
static ssize_t
-errors_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+errors_store(struct md_rdev *rdev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
__ATTR(errors, S_IRUGO|S_IWUSR, errors_show, errors_store);
static ssize_t
-slot_show(mdk_rdev_t *rdev, char *page)
+slot_show(struct md_rdev *rdev, char *page)
{
if (rdev->raid_disk < 0)
return sprintf(page, "none\n");
}
static ssize_t
-slot_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+slot_store(struct md_rdev *rdev, const char *buf, size_t len)
{
char *e;
int err;
if (rdev->mddev->pers->hot_remove_disk == NULL)
return -EINVAL;
err = rdev->mddev->pers->
- hot_remove_disk(rdev->mddev, rdev->raid_disk);
+ hot_remove_disk(rdev->mddev, rdev);
if (err)
return err;
sysfs_unlink_rdev(rdev->mddev, rdev);
set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery);
md_wakeup_thread(rdev->mddev->thread);
} else if (rdev->mddev->pers) {
- mdk_rdev_t *rdev2;
/* Activating a spare .. or possibly reactivating
* if we ever get bitmaps working here.
*/
if (rdev->mddev->pers->hot_add_disk == NULL)
return -EINVAL;
- list_for_each_entry(rdev2, &rdev->mddev->disks, same_set)
- if (rdev2->raid_disk == slot)
- return -EEXIST;
-
if (slot >= rdev->mddev->raid_disks &&
slot >= rdev->mddev->raid_disks + rdev->mddev->delta_disks)
return -ENOSPC;
rdev->saved_raid_disk = slot;
else
rdev->saved_raid_disk = -1;
+ clear_bit(In_sync, &rdev->flags);
err = rdev->mddev->pers->
hot_add_disk(rdev->mddev, rdev);
if (err) {
__ATTR(slot, S_IRUGO|S_IWUSR, slot_show, slot_store);
static ssize_t
-offset_show(mdk_rdev_t *rdev, char *page)
+offset_show(struct md_rdev *rdev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)rdev->data_offset);
}
static ssize_t
-offset_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+offset_store(struct md_rdev *rdev, const char *buf, size_t len)
{
char *e;
unsigned long long offset = simple_strtoull(buf, &e, 10);
__ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store);
static ssize_t
-rdev_size_show(mdk_rdev_t *rdev, char *page)
+rdev_size_show(struct md_rdev *rdev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2);
}
}
static ssize_t
-rdev_size_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len)
{
- mddev_t *my_mddev = rdev->mddev;
+ struct mddev *my_mddev = rdev->mddev;
sector_t oldsectors = rdev->sectors;
sector_t sectors;
* a deadlock. We have already changed rdev->sectors, and if
* we have to change it back, we will have the lock again.
*/
- mddev_t *mddev;
+ struct mddev *mddev;
int overlap = 0;
struct list_head *tmp;
mddev_unlock(my_mddev);
for_each_mddev(mddev, tmp) {
- mdk_rdev_t *rdev2;
+ struct md_rdev *rdev2;
mddev_lock(mddev);
- list_for_each_entry(rdev2, &mddev->disks, same_set)
+ rdev_for_each(rdev2, mddev)
if (rdev->bdev == rdev2->bdev &&
rdev != rdev2 &&
overlaps(rdev->data_offset, rdev->sectors,
__ATTR(size, S_IRUGO|S_IWUSR, rdev_size_show, rdev_size_store);
-static ssize_t recovery_start_show(mdk_rdev_t *rdev, char *page)
+static ssize_t recovery_start_show(struct md_rdev *rdev, char *page)
{
unsigned long long recovery_start = rdev->recovery_offset;
return sprintf(page, "%llu\n", recovery_start);
}
-static ssize_t recovery_start_store(mdk_rdev_t *rdev, const char *buf, size_t len)
+static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_t len)
{
unsigned long long recovery_start;
static struct rdev_sysfs_entry rdev_recovery_start =
__ATTR(recovery_start, S_IRUGO|S_IWUSR, recovery_start_show, recovery_start_store);
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack);
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack);
+
+static ssize_t bb_show(struct md_rdev *rdev, char *page)
+{
+ return badblocks_show(&rdev->badblocks, page, 0);
+}
+static ssize_t bb_store(struct md_rdev *rdev, const char *page, size_t len)
+{
+ int rv = badblocks_store(&rdev->badblocks, page, len, 0);
+ /* Maybe that ack was all we needed */
+ if (test_and_clear_bit(BlockedBadBlocks, &rdev->flags))
+ wake_up(&rdev->blocked_wait);
+ return rv;
+}
+static struct rdev_sysfs_entry rdev_bad_blocks =
+__ATTR(bad_blocks, S_IRUGO|S_IWUSR, bb_show, bb_store);
+
+
+static ssize_t ubb_show(struct md_rdev *rdev, char *page)
+{
+ return badblocks_show(&rdev->badblocks, page, 1);
+}
+static ssize_t ubb_store(struct md_rdev *rdev, const char *page, size_t len)
+{
+ return badblocks_store(&rdev->badblocks, page, len, 1);
+}
+static struct rdev_sysfs_entry rdev_unack_bad_blocks =
+__ATTR(unacknowledged_bad_blocks, S_IRUGO|S_IWUSR, ubb_show, ubb_store);
+
static struct attribute *rdev_default_attrs[] = {
&rdev_state.attr,
&rdev_errors.attr,
&rdev_offset.attr,
&rdev_size.attr,
&rdev_recovery_start.attr,
+ &rdev_bad_blocks.attr,
+ &rdev_unack_bad_blocks.attr,
NULL,
};
static ssize_t
rdev_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
- mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
- mddev_t *mddev = rdev->mddev;
+ struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
+ struct mddev *mddev = rdev->mddev;
ssize_t rv;
if (!entry->show)
const char *page, size_t length)
{
struct rdev_sysfs_entry *entry = container_of(attr, struct rdev_sysfs_entry, attr);
- mdk_rdev_t *rdev = container_of(kobj, mdk_rdev_t, kobj);
+ struct md_rdev *rdev = container_of(kobj, struct md_rdev, kobj);
ssize_t rv;
- mddev_t *mddev = rdev->mddev;
+ struct mddev *mddev = rdev->mddev;
if (!entry->store)
return -EIO;
static void rdev_free(struct kobject *ko)
{
- mdk_rdev_t *rdev = container_of(ko, mdk_rdev_t, kobj);
+ struct md_rdev *rdev = container_of(ko, struct md_rdev, kobj);
kfree(rdev);
}
static const struct sysfs_ops rdev_sysfs_ops = {
.default_attrs = rdev_default_attrs,
};
-void md_rdev_init(mdk_rdev_t *rdev)
+int md_rdev_init(struct md_rdev *rdev)
{
rdev->desc_nr = -1;
rdev->saved_raid_disk = -1;
rdev->sb_events = 0;
rdev->last_read_error.tv_sec = 0;
rdev->last_read_error.tv_nsec = 0;
+ rdev->sb_loaded = 0;
+ rdev->bb_page = NULL;
atomic_set(&rdev->nr_pending, 0);
atomic_set(&rdev->read_errors, 0);
atomic_set(&rdev->corrected_errors, 0);
INIT_LIST_HEAD(&rdev->same_set);
init_waitqueue_head(&rdev->blocked_wait);
+
+ /* Add space to store bad block list.
+ * This reserves the space even on arrays where it cannot
+ * be used - I wonder if that matters
+ */
+ rdev->badblocks.count = 0;
+ rdev->badblocks.shift = 0;
+ rdev->badblocks.page = kmalloc(PAGE_SIZE, GFP_KERNEL);
+ seqlock_init(&rdev->badblocks.lock);
+ if (rdev->badblocks.page == NULL)
+ return -ENOMEM;
+
+ return 0;
}
EXPORT_SYMBOL_GPL(md_rdev_init);
/*
*
* a faulty rdev _never_ has rdev->sb set.
*/
-static mdk_rdev_t *md_import_device(dev_t newdev, int super_format, int super_minor)
+static struct md_rdev *md_import_device(dev_t newdev, int super_format, int super_minor)
{
char b[BDEVNAME_SIZE];
int err;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
sector_t size;
rdev = kzalloc(sizeof(*rdev), GFP_KERNEL);
return ERR_PTR(-ENOMEM);
}
- md_rdev_init(rdev);
- if ((err = alloc_disk_sb(rdev)))
+ err = md_rdev_init(rdev);
+ if (err)
+ goto abort_free;
+ err = alloc_disk_sb(rdev);
+ if (err)
goto abort_free;
err = lock_rdev(rdev, newdev, super_format == -2);
goto abort_free;
}
}
+ if (super_format == -1)
+ /* hot-add for 0.90, or non-persistent: so no badblocks */
+ rdev->badblocks.shift = -1;
return rdev;
abort_free:
- if (rdev->sb_page) {
- if (rdev->bdev)
- unlock_rdev(rdev);
- free_disk_sb(rdev);
- }
+ if (rdev->bdev)
+ unlock_rdev(rdev);
+ free_disk_sb(rdev);
+ kfree(rdev->badblocks.page);
kfree(rdev);
return ERR_PTR(err);
}
*/
-static void analyze_sbs(mddev_t * mddev)
+static void analyze_sbs(struct mddev * mddev)
{
int i;
- mdk_rdev_t *rdev, *freshest, *tmp;
+ struct md_rdev *rdev, *freshest, *tmp;
char b[BDEVNAME_SIZE];
freshest = NULL;
- rdev_for_each(rdev, tmp, mddev)
+ rdev_for_each_safe(rdev, tmp, mddev)
switch (super_types[mddev->major_version].
load_super(rdev, freshest, mddev->minor_version)) {
case 1:
validate_super(mddev, freshest);
i = 0;
- rdev_for_each(rdev, tmp, mddev) {
+ rdev_for_each_safe(rdev, tmp, mddev) {
if (mddev->max_disks &&
(rdev->desc_nr >= mddev->max_disks ||
i > mddev->max_disks)) {
static void md_safemode_timeout(unsigned long data);
static ssize_t
-safe_delay_show(mddev_t *mddev, char *page)
+safe_delay_show(struct mddev *mddev, char *page)
{
int msec = (mddev->safemode_delay*1000)/HZ;
return sprintf(page, "%d.%03d\n", msec/1000, msec%1000);
}
static ssize_t
-safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len)
+safe_delay_store(struct mddev *mddev, const char *cbuf, size_t len)
{
unsigned long msec;
__ATTR(safe_mode_delay, S_IRUGO|S_IWUSR,safe_delay_show, safe_delay_store);
static ssize_t
-level_show(mddev_t *mddev, char *page)
+level_show(struct mddev *mddev, char *page)
{
- struct mdk_personality *p = mddev->pers;
+ struct md_personality *p = mddev->pers;
if (p)
return sprintf(page, "%s\n", p->name);
else if (mddev->clevel[0])
}
static ssize_t
-level_store(mddev_t *mddev, const char *buf, size_t len)
+level_store(struct mddev *mddev, const char *buf, size_t len)
{
char clevel[16];
ssize_t rv = len;
- struct mdk_personality *pers;
+ struct md_personality *pers;
long level;
void *priv;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
if (mddev->pers == NULL) {
if (len == 0)
return -EINVAL;
}
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
rdev->new_raid_disk = rdev->raid_disk;
/* ->takeover must set new_* and/or delta_disks
mddev->safemode = 0;
}
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
if (rdev->raid_disk < 0)
continue;
if (rdev->new_raid_disk >= mddev->raid_disks)
continue;
sysfs_unlink_rdev(mddev, rdev);
}
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
if (rdev->raid_disk < 0)
continue;
if (rdev->new_raid_disk == rdev->raid_disk)
static ssize_t
-layout_show(mddev_t *mddev, char *page)
+layout_show(struct mddev *mddev, char *page)
{
/* just a number, not meaningful for all levels */
if (mddev->reshape_position != MaxSector &&
}
static ssize_t
-layout_store(mddev_t *mddev, const char *buf, size_t len)
+layout_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
static ssize_t
-raid_disks_show(mddev_t *mddev, char *page)
+raid_disks_show(struct mddev *mddev, char *page)
{
if (mddev->raid_disks == 0)
return 0;
return sprintf(page, "%d\n", mddev->raid_disks);
}
-static int update_raid_disks(mddev_t *mddev, int raid_disks);
+static int update_raid_disks(struct mddev *mddev, int raid_disks);
static ssize_t
-raid_disks_store(mddev_t *mddev, const char *buf, size_t len)
+raid_disks_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
int rv = 0;
__ATTR(raid_disks, S_IRUGO|S_IWUSR, raid_disks_show, raid_disks_store);
static ssize_t
-chunk_size_show(mddev_t *mddev, char *page)
+chunk_size_show(struct mddev *mddev, char *page)
{
if (mddev->reshape_position != MaxSector &&
mddev->chunk_sectors != mddev->new_chunk_sectors)
}
static ssize_t
-chunk_size_store(mddev_t *mddev, const char *buf, size_t len)
+chunk_size_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
__ATTR(chunk_size, S_IRUGO|S_IWUSR, chunk_size_show, chunk_size_store);
static ssize_t
-resync_start_show(mddev_t *mddev, char *page)
+resync_start_show(struct mddev *mddev, char *page)
{
if (mddev->recovery_cp == MaxSector)
return sprintf(page, "none\n");
}
static ssize_t
-resync_start_store(mddev_t *mddev, const char *buf, size_t len)
+resync_start_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long n = simple_strtoull(buf, &e, 10);
}
static ssize_t
-array_state_show(mddev_t *mddev, char *page)
+array_state_show(struct mddev *mddev, char *page)
{
enum array_state st = inactive;
return sprintf(page, "%s\n", array_states[st]);
}
-static int do_md_stop(mddev_t * mddev, int ro, int is_open);
-static int md_set_readonly(mddev_t * mddev, int is_open);
-static int do_md_run(mddev_t * mddev);
-static int restart_array(mddev_t *mddev);
+static int do_md_stop(struct mddev * mddev, int ro, int is_open);
+static int md_set_readonly(struct mddev * mddev, int is_open);
+static int do_md_run(struct mddev * mddev);
+static int restart_array(struct mddev *mddev);
static ssize_t
-array_state_store(mddev_t *mddev, const char *buf, size_t len)
+array_state_store(struct mddev *mddev, const char *buf, size_t len)
{
int err = -EINVAL;
enum array_state st = match_word(buf, array_states);
if (err)
return err;
else {
+ if (mddev->hold_active == UNTIL_IOCTL)
+ mddev->hold_active = 0;
sysfs_notify_dirent_safe(mddev->sysfs_state);
return len;
}
__ATTR(array_state, S_IRUGO|S_IWUSR, array_state_show, array_state_store);
static ssize_t
-max_corrected_read_errors_show(mddev_t *mddev, char *page) {
+max_corrected_read_errors_show(struct mddev *mddev, char *page) {
return sprintf(page, "%d\n",
atomic_read(&mddev->max_corr_read_errors));
}
static ssize_t
-max_corrected_read_errors_store(mddev_t *mddev, const char *buf, size_t len)
+max_corrected_read_errors_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long n = simple_strtoul(buf, &e, 10);
max_corrected_read_errors_store);
static ssize_t
-null_show(mddev_t *mddev, char *page)
+null_show(struct mddev *mddev, char *page)
{
return -EINVAL;
}
static ssize_t
-new_dev_store(mddev_t *mddev, const char *buf, size_t len)
+new_dev_store(struct mddev *mddev, const char *buf, size_t len)
{
/* buf must be %d:%d\n? giving major and minor numbers */
/* The new device is added to the array.
int major = simple_strtoul(buf, &e, 10);
int minor;
dev_t dev;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int err;
if (!*buf || *e != ':' || !e[1] || e[1] == '\n')
rdev = md_import_device(dev, mddev->major_version,
mddev->minor_version);
if (!IS_ERR(rdev) && !list_empty(&mddev->disks)) {
- mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
- mdk_rdev_t, same_set);
+ struct md_rdev *rdev0
+ = list_entry(mddev->disks.next,
+ struct md_rdev, same_set);
err = super_types[mddev->major_version]
.load_super(rdev, rdev0, mddev->minor_version);
if (err < 0)
__ATTR(new_dev, S_IWUSR, null_show, new_dev_store);
static ssize_t
-bitmap_store(mddev_t *mddev, const char *buf, size_t len)
+bitmap_store(struct mddev *mddev, const char *buf, size_t len)
{
char *end;
unsigned long chunk, end_chunk;
__ATTR(bitmap_set_bits, S_IWUSR, null_show, bitmap_store);
static ssize_t
-size_show(mddev_t *mddev, char *page)
+size_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)mddev->dev_sectors / 2);
}
-static int update_size(mddev_t *mddev, sector_t num_sectors);
+static int update_size(struct mddev *mddev, sector_t num_sectors);
static ssize_t
-size_store(mddev_t *mddev, const char *buf, size_t len)
+size_store(struct mddev *mddev, const char *buf, size_t len)
{
/* If array is inactive, we can reduce the component size, but
* not increase it (except from 0).
* or N.M for internally known formats
*/
static ssize_t
-metadata_show(mddev_t *mddev, char *page)
+metadata_show(struct mddev *mddev, char *page)
{
if (mddev->persistent)
return sprintf(page, "%d.%d\n",
}
static ssize_t
-metadata_store(mddev_t *mddev, const char *buf, size_t len)
+metadata_store(struct mddev *mddev, const char *buf, size_t len)
{
int major, minor;
char *e;
__ATTR(metadata_version, S_IRUGO|S_IWUSR, metadata_show, metadata_store);
static ssize_t
-action_show(mddev_t *mddev, char *page)
+action_show(struct mddev *mddev, char *page)
{
char *type = "idle";
if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
return sprintf(page, "%s\n", type);
}
-static void reap_sync_thread(mddev_t *mddev);
+static void reap_sync_thread(struct mddev *mddev);
static ssize_t
-action_store(mddev_t *mddev, const char *page, size_t len)
+action_store(struct mddev *mddev, const char *page, size_t len)
{
if (!mddev->pers || !mddev->pers->sync_request)
return -EINVAL;
}
static ssize_t
-mismatch_cnt_show(mddev_t *mddev, char *page)
+mismatch_cnt_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long) mddev->resync_mismatches);
static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
static ssize_t
-sync_min_show(mddev_t *mddev, char *page)
+sync_min_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d (%s)\n", speed_min(mddev),
mddev->sync_speed_min ? "local": "system");
}
static ssize_t
-sync_min_store(mddev_t *mddev, const char *buf, size_t len)
+sync_min_store(struct mddev *mddev, const char *buf, size_t len)
{
int min;
char *e;
__ATTR(sync_speed_min, S_IRUGO|S_IWUSR, sync_min_show, sync_min_store);
static ssize_t
-sync_max_show(mddev_t *mddev, char *page)
+sync_max_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d (%s)\n", speed_max(mddev),
mddev->sync_speed_max ? "local": "system");
}
static ssize_t
-sync_max_store(mddev_t *mddev, const char *buf, size_t len)
+sync_max_store(struct mddev *mddev, const char *buf, size_t len)
{
int max;
char *e;
__ATTR(sync_speed_max, S_IRUGO|S_IWUSR, sync_max_show, sync_max_store);
static ssize_t
-degraded_show(mddev_t *mddev, char *page)
+degraded_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d\n", mddev->degraded);
}
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
-sync_force_parallel_show(mddev_t *mddev, char *page)
+sync_force_parallel_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%d\n", mddev->parallel_resync);
}
static ssize_t
-sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
{
long n;
sync_force_parallel_show, sync_force_parallel_store);
static ssize_t
-sync_speed_show(mddev_t *mddev, char *page)
+sync_speed_show(struct mddev *mddev, char *page)
{
unsigned long resync, dt, db;
if (mddev->curr_resync == 0)
static struct md_sysfs_entry md_sync_speed = __ATTR_RO(sync_speed);
static ssize_t
-sync_completed_show(mddev_t *mddev, char *page)
+sync_completed_show(struct mddev *mddev, char *page)
{
unsigned long long max_sectors, resync;
static struct md_sysfs_entry md_sync_completed = __ATTR_RO(sync_completed);
static ssize_t
-min_sync_show(mddev_t *mddev, char *page)
+min_sync_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%llu\n",
(unsigned long long)mddev->resync_min);
}
static ssize_t
-min_sync_store(mddev_t *mddev, const char *buf, size_t len)
+min_sync_store(struct mddev *mddev, const char *buf, size_t len)
{
unsigned long long min;
if (strict_strtoull(buf, 10, &min))
__ATTR(sync_min, S_IRUGO|S_IWUSR, min_sync_show, min_sync_store);
static ssize_t
-max_sync_show(mddev_t *mddev, char *page)
+max_sync_show(struct mddev *mddev, char *page)
{
if (mddev->resync_max == MaxSector)
return sprintf(page, "max\n");
(unsigned long long)mddev->resync_max);
}
static ssize_t
-max_sync_store(mddev_t *mddev, const char *buf, size_t len)
+max_sync_store(struct mddev *mddev, const char *buf, size_t len)
{
if (strncmp(buf, "max", 3) == 0)
mddev->resync_max = MaxSector;
__ATTR(sync_max, S_IRUGO|S_IWUSR, max_sync_show, max_sync_store);
static ssize_t
-suspend_lo_show(mddev_t *mddev, char *page)
+suspend_lo_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_lo);
}
static ssize_t
-suspend_lo_store(mddev_t *mddev, const char *buf, size_t len)
+suspend_lo_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
static ssize_t
-suspend_hi_show(mddev_t *mddev, char *page)
+suspend_hi_show(struct mddev *mddev, char *page)
{
return sprintf(page, "%llu\n", (unsigned long long)mddev->suspend_hi);
}
static ssize_t
-suspend_hi_store(mddev_t *mddev, const char *buf, size_t len)
+suspend_hi_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
__ATTR(suspend_hi, S_IRUGO|S_IWUSR, suspend_hi_show, suspend_hi_store);
static ssize_t
-reshape_position_show(mddev_t *mddev, char *page)
+reshape_position_show(struct mddev *mddev, char *page)
{
if (mddev->reshape_position != MaxSector)
return sprintf(page, "%llu\n",
}
static ssize_t
-reshape_position_store(mddev_t *mddev, const char *buf, size_t len)
+reshape_position_store(struct mddev *mddev, const char *buf, size_t len)
{
char *e;
unsigned long long new = simple_strtoull(buf, &e, 10);
reshape_position_store);
static ssize_t
-array_size_show(mddev_t *mddev, char *page)
+array_size_show(struct mddev *mddev, char *page)
{
if (mddev->external_size)
return sprintf(page, "%llu\n",
}
static ssize_t
-array_size_store(mddev_t *mddev, const char *buf, size_t len)
+array_size_store(struct mddev *mddev, const char *buf, size_t len)
{
sector_t sectors;
md_attr_show(struct kobject *kobj, struct attribute *attr, char *page)
{
struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
- mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+ struct mddev *mddev = container_of(kobj, struct mddev, kobj);
ssize_t rv;
if (!entry->show)
return -EIO;
+ spin_lock(&all_mddevs_lock);
+ if (list_empty(&mddev->all_mddevs)) {
+ spin_unlock(&all_mddevs_lock);
+ return -EBUSY;
+ }
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
+
rv = mddev_lock(mddev);
if (!rv) {
rv = entry->show(mddev, page);
mddev_unlock(mddev);
}
+ mddev_put(mddev);
return rv;
}
const char *page, size_t length)
{
struct md_sysfs_entry *entry = container_of(attr, struct md_sysfs_entry, attr);
- mddev_t *mddev = container_of(kobj, struct mddev_s, kobj);
+ struct mddev *mddev = container_of(kobj, struct mddev, kobj);
ssize_t rv;
if (!entry->store)
return -EIO;
if (!capable(CAP_SYS_ADMIN))
return -EACCES;
+ spin_lock(&all_mddevs_lock);
+ if (list_empty(&mddev->all_mddevs)) {
+ spin_unlock(&all_mddevs_lock);
+ return -EBUSY;
+ }
+ mddev_get(mddev);
+ spin_unlock(&all_mddevs_lock);
rv = mddev_lock(mddev);
- if (mddev->hold_active == UNTIL_IOCTL)
- mddev->hold_active = 0;
if (!rv) {
rv = entry->store(mddev, page, length);
mddev_unlock(mddev);
}
+ mddev_put(mddev);
return rv;
}
static void md_free(struct kobject *ko)
{
- mddev_t *mddev = container_of(ko, mddev_t, kobj);
+ struct mddev *mddev = container_of(ko, struct mddev, kobj);
if (mddev->sysfs_state)
sysfs_put(mddev->sysfs_state);
static void mddev_delayed_delete(struct work_struct *ws)
{
- mddev_t *mddev = container_of(ws, mddev_t, del_work);
+ struct mddev *mddev = container_of(ws, struct mddev, del_work);
sysfs_remove_group(&mddev->kobj, &md_bitmap_group);
kobject_del(&mddev->kobj);
static int md_alloc(dev_t dev, char *name)
{
static DEFINE_MUTEX(disks_mutex);
- mddev_t *mddev = mddev_find(dev);
+ struct mddev *mddev = mddev_find(dev);
struct gendisk *disk;
int partitioned;
int shift;
if (name) {
/* Need to ensure that 'name' is not a duplicate.
*/
- mddev_t *mddev2;
+ struct mddev *mddev2;
spin_lock(&all_mddevs_lock);
list_for_each_entry(mddev2, &all_mddevs, all_mddevs)
mddev->queue->queuedata = mddev;
blk_queue_make_request(mddev->queue, md_make_request);
+ blk_set_stacking_limits(&mddev->queue->limits);
disk = alloc_disk(1 << shift);
if (!disk) {
static void md_safemode_timeout(unsigned long data)
{
- mddev_t *mddev = (mddev_t *) data;
+ struct mddev *mddev = (struct mddev *) data;
if (!atomic_read(&mddev->writes_pending)) {
mddev->safemode = 1;
static int start_dirty_degraded;
-int md_run(mddev_t *mddev)
+int md_run(struct mddev *mddev)
{
int err;
- mdk_rdev_t *rdev;
- struct mdk_personality *pers;
+ struct md_rdev *rdev;
+ struct md_personality *pers;
if (list_empty(&mddev->disks))
/* cannot run an array with no devices.. */
* the only valid external interface is through the md
* device.
*/
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
if (test_bit(Faulty, &rdev->flags))
continue;
sync_blockdev(rdev->bdev);
}
if (mddev->bio_set == NULL)
- mddev->bio_set = bioset_create(BIO_POOL_SIZE, sizeof(mddev));
+ mddev->bio_set = bioset_create(BIO_POOL_SIZE,
+ sizeof(struct mddev *));
spin_lock(&pers_lock);
pers = find_pers(mddev->level, mddev->clevel);
* configuration.
*/
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
- mdk_rdev_t *rdev2;
+ struct md_rdev *rdev2;
int warned = 0;
- list_for_each_entry(rdev, &mddev->disks, same_set)
- list_for_each_entry(rdev2, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev)
+ rdev_for_each(rdev2, mddev) {
if (rdev < rdev2 &&
rdev->bdev->bd_contains ==
rdev2->bdev->bd_contains) {
mddev->in_sync = 1;
smp_wmb();
mddev->ready = 1;
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0)
if (sysfs_link_rdev(mddev, rdev))
/* failure here is OK */;
}
EXPORT_SYMBOL_GPL(md_run);
-static int do_md_run(mddev_t *mddev)
+static int do_md_run(struct mddev *mddev)
{
int err;
return err;
}
-static int restart_array(mddev_t *mddev)
+static int restart_array(struct mddev *mddev)
{
struct gendisk *disk = mddev->gendisk;
spin_unlock(&inode->i_lock);
}
-static void md_clean(mddev_t *mddev)
+static void md_clean(struct mddev *mddev)
{
mddev->array_sectors = 0;
mddev->external_size = 0;
mddev->changed = 0;
mddev->degraded = 0;
mddev->safemode = 0;
+ mddev->merge_check_needed = 0;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.default_offset = 0;
mddev->bitmap_info.chunksize = 0;
mddev->bitmap_info.max_write_behind = 0;
}
-static void __md_stop_writes(mddev_t *mddev)
+static void __md_stop_writes(struct mddev *mddev)
{
if (mddev->sync_thread) {
set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
}
}
-void md_stop_writes(mddev_t *mddev)
+void md_stop_writes(struct mddev *mddev)
{
mddev_lock(mddev);
__md_stop_writes(mddev);
}
EXPORT_SYMBOL_GPL(md_stop_writes);
-void md_stop(mddev_t *mddev)
+void md_stop(struct mddev *mddev)
{
mddev->ready = 0;
mddev->pers->stop(mddev);
}
EXPORT_SYMBOL_GPL(md_stop);
-static int md_set_readonly(mddev_t *mddev, int is_open)
+static int md_set_readonly(struct mddev *mddev, int is_open)
{
int err = 0;
mutex_lock(&mddev->open_mutex);
* 0 - completely stop and dis-assemble array
* 2 - stop but do not disassemble array
*/
-static int do_md_stop(mddev_t * mddev, int mode, int is_open)
+static int do_md_stop(struct mddev * mddev, int mode, int is_open)
{
struct gendisk *disk = mddev->gendisk;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
mutex_lock(&mddev->open_mutex);
if (atomic_read(&mddev->openers) > is_open ||
/* tell userspace to handle 'inactive' */
sysfs_notify_dirent_safe(mddev->sysfs_state);
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0)
sysfs_unlink_rdev(mddev, rdev);
}
#ifndef MODULE
-static void autorun_array(mddev_t *mddev)
+static void autorun_array(struct mddev *mddev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int err;
if (list_empty(&mddev->disks))
printk(KERN_INFO "md: running: ");
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
printk("<%s>", bdevname(rdev->bdev,b));
}
*/
static void autorun_devices(int part)
{
- mdk_rdev_t *rdev0, *rdev, *tmp;
- mddev_t *mddev;
+ struct md_rdev *rdev0, *rdev, *tmp;
+ struct mddev *mddev;
char b[BDEVNAME_SIZE];
printk(KERN_INFO "md: autorun ...\n");
dev_t dev;
LIST_HEAD(candidates);
rdev0 = list_entry(pending_raid_disks.next,
- mdk_rdev_t, same_set);
+ struct md_rdev, same_set);
printk(KERN_INFO "md: considering %s ...\n",
bdevname(rdev0->bdev,b));
return 0;
}
-static int get_array_info(mddev_t * mddev, void __user * arg)
+static int get_array_info(struct mddev * mddev, void __user * arg)
{
mdu_array_info_t info;
int nr,working,insync,failed,spare;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
nr=working=insync=failed=spare=0;
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
nr++;
if (test_bit(Faulty, &rdev->flags))
failed++;
return 0;
}
-static int get_bitmap_file(mddev_t * mddev, void __user * arg)
+static int get_bitmap_file(struct mddev * mddev, void __user * arg)
{
mdu_bitmap_file_t *file = NULL; /* too big for stack allocation */
char *ptr, *buf = NULL;
return err;
}
-static int get_disk_info(mddev_t * mddev, void __user * arg)
+static int get_disk_info(struct mddev * mddev, void __user * arg)
{
mdu_disk_info_t info;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
if (copy_from_user(&info, arg, sizeof(info)))
return -EFAULT;
return 0;
}
-static int add_new_disk(mddev_t * mddev, mdu_disk_info_t *info)
+static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
{
char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE];
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
dev_t dev = MKDEV(info->major,info->minor);
if (info->major != MAJOR(dev) || info->minor != MINOR(dev))
return PTR_ERR(rdev);
}
if (!list_empty(&mddev->disks)) {
- mdk_rdev_t *rdev0 = list_entry(mddev->disks.next,
- mdk_rdev_t, same_set);
+ struct md_rdev *rdev0
+ = list_entry(mddev->disks.next,
+ struct md_rdev, same_set);
err = super_types[mddev->major_version]
.load_super(rdev, rdev0, mddev->minor_version);
if (err < 0) {
return 0;
}
-static int hot_remove_disk(mddev_t * mddev, dev_t dev)
+static int hot_remove_disk(struct mddev * mddev, dev_t dev)
{
char b[BDEVNAME_SIZE];
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
rdev = find_rdev(mddev, dev);
if (!rdev)
return -EBUSY;
}
-static int hot_add_disk(mddev_t * mddev, dev_t dev)
+static int hot_add_disk(struct mddev * mddev, dev_t dev)
{
char b[BDEVNAME_SIZE];
int err;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
if (!mddev->pers)
return -ENODEV;
return err;
}
-static int set_bitmap_file(mddev_t *mddev, int fd)
+static int set_bitmap_file(struct mddev *mddev, int fd)
{
int err;
* The minor and patch _version numbers are also kept incase the
* super_block handler wishes to interpret them.
*/
-static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
+static int set_array_info(struct mddev * mddev, mdu_array_info_t *info)
{
if (info->raid_disks == 0) {
return 0;
}
-void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors)
+void md_set_array_sectors(struct mddev *mddev, sector_t array_sectors)
{
WARN(!mddev_is_locked(mddev), "%s: unlocked mddev!\n", __func__);
}
EXPORT_SYMBOL(md_set_array_sectors);
-static int update_size(mddev_t *mddev, sector_t num_sectors)
+static int update_size(struct mddev *mddev, sector_t num_sectors)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int rv;
int fit = (num_sectors == 0);
* grow, and re-add.
*/
return -EBUSY;
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
sector_t avail = rdev->sectors;
if (fit && (num_sectors == 0 || num_sectors > avail))
return rv;
}
-static int update_raid_disks(mddev_t *mddev, int raid_disks)
+static int update_raid_disks(struct mddev *mddev, int raid_disks)
{
int rv;
/* change the number of raid disks */
* Any differences that cannot be handled will cause an error.
* Normally, only one change can be managed at a time.
*/
-static int update_array_info(mddev_t *mddev, mdu_array_info_t *info)
+static int update_array_info(struct mddev *mddev, mdu_array_info_t *info)
{
int rv = 0;
int cnt = 0;
return rv;
}
-static int set_disk_faulty(mddev_t *mddev, dev_t dev)
+static int set_disk_faulty(struct mddev *mddev, dev_t dev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
if (mddev->pers == NULL)
return -ENODEV;
return -ENODEV;
md_error(mddev, rdev);
+ if (!test_bit(Faulty, &rdev->flags))
+ return -EBUSY;
return 0;
}
*/
static int md_getgeo(struct block_device *bdev, struct hd_geometry *geo)
{
- mddev_t *mddev = bdev->bd_disk->private_data;
+ struct mddev *mddev = bdev->bd_disk->private_data;
geo->heads = 2;
geo->sectors = 4;
{
int err = 0;
void __user *argp = (void __user *)arg;
- mddev_t *mddev = NULL;
+ struct mddev *mddev = NULL;
int ro;
- if (!capable(CAP_SYS_ADMIN))
- return -EACCES;
+ switch (cmd) {
+ case RAID_VERSION:
+ case GET_ARRAY_INFO:
+ case GET_DISK_INFO:
+ break;
+ default:
+ if (!capable(CAP_SYS_ADMIN))
+ return -EACCES;
+ }
/*
* Commands dealing with the RAID driver but not any
* Succeed if we can lock the mddev, which confirms that
* it isn't being stopped right now.
*/
- mddev_t *mddev = mddev_find(bdev->bd_dev);
+ struct mddev *mddev = mddev_find(bdev->bd_dev);
int err;
if (mddev->gendisk != bdev->bd_disk) {
static int md_release(struct gendisk *disk, fmode_t mode)
{
- mddev_t *mddev = disk->private_data;
+ struct mddev *mddev = disk->private_data;
BUG_ON(!mddev);
atomic_dec(&mddev->openers);
static int md_media_changed(struct gendisk *disk)
{
- mddev_t *mddev = disk->private_data;
+ struct mddev *mddev = disk->private_data;
return mddev->changed;
}
static int md_revalidate(struct gendisk *disk)
{
- mddev_t *mddev = disk->private_data;
+ struct mddev *mddev = disk->private_data;
mddev->changed = 0;
return 0;
static int md_thread(void * arg)
{
- mdk_thread_t *thread = arg;
+ struct md_thread *thread = arg;
/*
* md_thread is a 'system-thread', it's priority should be very
return 0;
}
-void md_wakeup_thread(mdk_thread_t *thread)
+void md_wakeup_thread(struct md_thread *thread)
{
if (thread) {
- dprintk("md: waking up MD thread %s.\n", thread->tsk->comm);
+ pr_debug("md: waking up MD thread %s.\n", thread->tsk->comm);
set_bit(THREAD_WAKEUP, &thread->flags);
wake_up(&thread->wqueue);
}
}
-mdk_thread_t *md_register_thread(void (*run) (mddev_t *), mddev_t *mddev,
+struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev *mddev,
const char *name)
{
- mdk_thread_t *thread;
+ struct md_thread *thread;
- thread = kzalloc(sizeof(mdk_thread_t), GFP_KERNEL);
+ thread = kzalloc(sizeof(struct md_thread), GFP_KERNEL);
if (!thread)
return NULL;
return thread;
}
-void md_unregister_thread(mdk_thread_t *thread)
+void md_unregister_thread(struct md_thread **threadp)
{
+ struct md_thread *thread = *threadp;
if (!thread)
return;
- dprintk("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+ pr_debug("interrupting MD-thread pid %d\n", task_pid_nr(thread->tsk));
+ /* Locking ensures that mddev_unlock does not wake_up a
+ * non-existent thread
+ */
+ spin_lock(&pers_lock);
+ *threadp = NULL;
+ spin_unlock(&pers_lock);
kthread_stop(thread->tsk);
kfree(thread);
}
-void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
+void md_error(struct mddev *mddev, struct md_rdev *rdev)
{
if (!mddev) {
MD_BUG();
if (!rdev || test_bit(Faulty, &rdev->flags))
return;
- if (mddev->external)
- set_bit(Blocked, &rdev->flags);
-/*
- dprintk("md_error dev:%s, rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
- mdname(mddev),
- MAJOR(rdev->bdev->bd_dev), MINOR(rdev->bdev->bd_dev),
- __builtin_return_address(0),__builtin_return_address(1),
- __builtin_return_address(2),__builtin_return_address(3));
-*/
- if (!mddev->pers)
- return;
- if (!mddev->pers->error_handler)
+ if (!mddev->pers || !mddev->pers->error_handler)
return;
mddev->pers->error_handler(mddev,rdev);
if (mddev->degraded)
static void status_unused(struct seq_file *seq)
{
int i = 0;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
seq_printf(seq, "unused devices: ");
}
-static void status_resync(struct seq_file *seq, mddev_t * mddev)
+static void status_resync(struct seq_file *seq, struct mddev * mddev)
{
sector_t max_sectors, resync, res;
unsigned long dt, db;
{
struct list_head *tmp;
loff_t l = *pos;
- mddev_t *mddev;
+ struct mddev *mddev;
if (l >= 0x10000)
return NULL;
spin_lock(&all_mddevs_lock);
list_for_each(tmp,&all_mddevs)
if (!l--) {
- mddev = list_entry(tmp, mddev_t, all_mddevs);
+ mddev = list_entry(tmp, struct mddev, all_mddevs);
mddev_get(mddev);
spin_unlock(&all_mddevs_lock);
return mddev;
static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
struct list_head *tmp;
- mddev_t *next_mddev, *mddev = v;
+ struct mddev *next_mddev, *mddev = v;
++*pos;
if (v == (void*)2)
else
tmp = mddev->all_mddevs.next;
if (tmp != &all_mddevs)
- next_mddev = mddev_get(list_entry(tmp,mddev_t,all_mddevs));
+ next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs));
else {
next_mddev = (void*)2;
*pos = 0x10000;
static void md_seq_stop(struct seq_file *seq, void *v)
{
- mddev_t *mddev = v;
+ struct mddev *mddev = v;
if (mddev && v != (void*)1 && v != (void*)2)
mddev_put(mddev);
}
-struct mdstat_info {
- int event;
-};
-
static int md_seq_show(struct seq_file *seq, void *v)
{
- mddev_t *mddev = v;
+ struct mddev *mddev = v;
sector_t sectors;
- mdk_rdev_t *rdev;
- struct mdstat_info *mi = seq->private;
- struct bitmap *bitmap;
+ struct md_rdev *rdev;
if (v == (void*)1) {
- struct mdk_personality *pers;
+ struct md_personality *pers;
seq_printf(seq, "Personalities : ");
spin_lock(&pers_lock);
list_for_each_entry(pers, &pers_list, list)
spin_unlock(&pers_lock);
seq_printf(seq, "\n");
- mi->event = atomic_read(&md_event_count);
+ seq->poll_event = atomic_read(&md_event_count);
return 0;
}
if (v == (void*)2) {
}
sectors = 0;
- list_for_each_entry(rdev, &mddev->disks, same_set) {
+ rdev_for_each(rdev, mddev) {
char b[BDEVNAME_SIZE];
seq_printf(seq, " %s[%d]",
bdevname(rdev->bdev,b), rdev->desc_nr);
if (test_bit(Faulty, &rdev->flags)) {
seq_printf(seq, "(F)");
continue;
- } else if (rdev->raid_disk < 0)
+ }
+ if (rdev->raid_disk < 0)
seq_printf(seq, "(S)"); /* spare */
+ if (test_bit(Replacement, &rdev->flags))
+ seq_printf(seq, "(R)");
sectors += rdev->sectors;
}
} else
seq_printf(seq, "\n ");
- if ((bitmap = mddev->bitmap)) {
- unsigned long chunk_kb;
- unsigned long flags;
- spin_lock_irqsave(&bitmap->lock, flags);
- chunk_kb = mddev->bitmap_info.chunksize >> 10;
- seq_printf(seq, "bitmap: %lu/%lu pages [%luKB], "
- "%lu%s chunk",
- bitmap->pages - bitmap->missing_pages,
- bitmap->pages,
- (bitmap->pages - bitmap->missing_pages)
- << (PAGE_SHIFT - 10),
- chunk_kb ? chunk_kb : mddev->bitmap_info.chunksize,
- chunk_kb ? "KB" : "B");
- if (bitmap->file) {
- seq_printf(seq, ", file: ");
- seq_path(seq, &bitmap->file->f_path, " \t\n");
- }
-
- seq_printf(seq, "\n");
- spin_unlock_irqrestore(&bitmap->lock, flags);
- }
+ bitmap_status(seq, mddev->bitmap);
seq_printf(seq, "\n");
}
static int md_seq_open(struct inode *inode, struct file *file)
{
+ struct seq_file *seq;
int error;
- struct mdstat_info *mi = kmalloc(sizeof(*mi), GFP_KERNEL);
- if (mi == NULL)
- return -ENOMEM;
error = seq_open(file, &md_seq_ops);
if (error)
- kfree(mi);
- else {
- struct seq_file *p = file->private_data;
- p->private = mi;
- mi->event = atomic_read(&md_event_count);
- }
+ return error;
+
+ seq = file->private_data;
+ seq->poll_event = atomic_read(&md_event_count);
return error;
}
static unsigned int mdstat_poll(struct file *filp, poll_table *wait)
{
- struct seq_file *m = filp->private_data;
- struct mdstat_info *mi = m->private;
+ struct seq_file *seq = filp->private_data;
int mask;
poll_wait(filp, &md_event_waiters, wait);
/* always allow read */
mask = POLLIN | POLLRDNORM;
- if (mi->event != atomic_read(&md_event_count))
+ if (seq->poll_event != atomic_read(&md_event_count))
mask |= POLLERR | POLLPRI;
return mask;
}
.poll = mdstat_poll,
};
-int register_md_personality(struct mdk_personality *p)
+int register_md_personality(struct md_personality *p)
{
spin_lock(&pers_lock);
list_add_tail(&p->list, &pers_list);
return 0;
}
-int unregister_md_personality(struct mdk_personality *p)
+int unregister_md_personality(struct md_personality *p)
{
printk(KERN_INFO "md: %s personality unregistered\n", p->name);
spin_lock(&pers_lock);
return 0;
}
-static int is_mddev_idle(mddev_t *mddev, int init)
+static int is_mddev_idle(struct mddev *mddev, int init)
{
- mdk_rdev_t * rdev;
+ struct md_rdev * rdev;
int idle;
int curr_events;
return idle;
}
-void md_done_sync(mddev_t *mddev, int blocks, int ok)
+void md_done_sync(struct mddev *mddev, int blocks, int ok)
{
/* another "blocks" (512byte) blocks have been synced */
atomic_sub(blocks, &mddev->recovery_active);
* in superblock) before writing, schedule a superblock update
* and wait for it to complete.
*/
-void md_write_start(mddev_t *mddev, struct bio *bi)
+void md_write_start(struct mddev *mddev, struct bio *bi)
{
int did_change = 0;
if (bio_data_dir(bi) != WRITE)
!test_bit(MD_CHANGE_PENDING, &mddev->flags));
}
-void md_write_end(mddev_t *mddev)
+void md_write_end(struct mddev *mddev)
{
if (atomic_dec_and_test(&mddev->writes_pending)) {
if (mddev->safemode == 2)
* In the ->external case MD_CHANGE_CLEAN can not be cleared until mddev->lock
* is dropped, so return -EAGAIN after notifying userspace.
*/
-int md_allow_write(mddev_t *mddev)
+int md_allow_write(struct mddev *mddev)
{
if (!mddev->pers)
return 0;
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
-void md_do_sync(mddev_t *mddev)
+void md_do_sync(struct mddev *mddev)
{
- mddev_t *mddev2;
+ struct mddev *mddev2;
unsigned int currspeed = 0,
window;
sector_t max_sectors,j, io_sectors;
struct list_head *tmp;
sector_t last_check;
int skipped = 0;
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
char *desc;
/* just incase thread restarts... */
max_sectors = mddev->dev_sectors;
j = MaxSector;
rcu_read_lock();
- list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
!test_bit(In_sync, &rdev->flags) &&
atomic_add(sectors, &mddev->recovery_active);
}
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
+ break;
+
j += sectors;
if (j>1) mddev->curr_resync = j;
mddev->curr_mark_cnt = io_sectors;
if (last_check == 0)
- /* this is the earliers that rebuilt will be
+ /* this is the earliest that rebuild will be
* visible in /proc/mdstat
*/
md_new_event(mddev);
continue;
last_check = io_sectors;
-
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
- break;
-
repeat:
if (time_after_eq(jiffies, mark[last_mark] + SYNC_MARK_STEP )) {
/* step marks */
printk(KERN_INFO
"md: checkpointing %s of %s.\n",
desc, mdname(mddev));
- mddev->recovery_cp = mddev->curr_resync;
+ mddev->recovery_cp =
+ mddev->curr_resync_completed;
}
} else
mddev->recovery_cp = MaxSector;
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery))
mddev->curr_resync = MaxSector;
rcu_read_lock();
- list_for_each_entry_rcu(rdev, &mddev->disks, same_set)
+ rdev_for_each_rcu(rdev, mddev)
if (rdev->raid_disk >= 0 &&
mddev->delta_disks >= 0 &&
!test_bit(Faulty, &rdev->flags) &&
rcu_read_unlock();
}
}
+ skip:
set_bit(MD_CHANGE_DEVS, &mddev->flags);
- skip:
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* We completed so min/max setting can be forgotten if used. */
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
}
EXPORT_SYMBOL_GPL(md_do_sync);
-static int remove_and_add_spares(mddev_t *mddev)
+static int remove_and_add_spares(struct mddev *mddev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
int spares = 0;
+ int removed = 0;
mddev->curr_resync_completed = 0;
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
(test_bit(Faulty, &rdev->flags) ||
! test_bit(In_sync, &rdev->flags)) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
- mddev, rdev->raid_disk)==0) {
+ mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
+ removed++;
}
}
+ if (removed)
+ sysfs_notify(&mddev->kobj, NULL,
+ "degraded");
- if (mddev->degraded) {
- list_for_each_entry(rdev, &mddev->disks, same_set) {
- if (rdev->raid_disk >= 0 &&
- !test_bit(In_sync, &rdev->flags) &&
- !test_bit(Faulty, &rdev->flags) &&
- !test_bit(Blocked, &rdev->flags))
+
+ rdev_for_each(rdev, mddev) {
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(In_sync, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags))
+ spares++;
+ if (rdev->raid_disk < 0
+ && !test_bit(Faulty, &rdev->flags)) {
+ rdev->recovery_offset = 0;
+ if (mddev->pers->
+ hot_add_disk(mddev, rdev) == 0) {
+ if (sysfs_link_rdev(mddev, rdev))
+ /* failure here is OK */;
spares++;
- if (rdev->raid_disk < 0
- && !test_bit(Faulty, &rdev->flags)) {
- rdev->recovery_offset = 0;
- if (mddev->pers->
- hot_add_disk(mddev, rdev) == 0) {
- if (sysfs_link_rdev(mddev, rdev))
- /* failure here is OK */;
- spares++;
- md_new_event(mddev);
- set_bit(MD_CHANGE_DEVS, &mddev->flags);
- } else
- break;
+ md_new_event(mddev);
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
}
}
}
return spares;
}
-static void reap_sync_thread(mddev_t *mddev)
+static void reap_sync_thread(struct mddev *mddev)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
/* resync has finished, collect result */
- md_unregister_thread(mddev->sync_thread);
- mddev->sync_thread = NULL;
+ md_unregister_thread(&mddev->sync_thread);
if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
/* success...*/
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
mddev->pers->finish_reshape)
mddev->pers->finish_reshape(mddev);
- md_update_sb(mddev, 1);
- /* if array is no-longer degraded, then any saved_raid_disk
- * information must be scrapped
+ /* If array is no-longer degraded, then any saved_raid_disk
+ * information must be scrapped. Also if any device is now
+ * In_sync we must scrape the saved_raid_disk for that device
+ * do the superblock for an incrementally recovered device
+ * written out.
*/
- if (!mddev->degraded)
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ rdev_for_each(rdev, mddev)
+ if (!mddev->degraded ||
+ test_bit(In_sync, &rdev->flags))
rdev->saved_raid_disk = -1;
+ md_update_sb(mddev, 1);
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
clear_bit(MD_RECOVERY_SYNC, &mddev->recovery);
clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
sysfs_notify_dirent_safe(mddev->sysfs_action);
md_new_event(mddev);
+ if (mddev->event_work.func)
+ queue_work(md_misc_wq, &mddev->event_work);
}
/*
* 5/ If array is degraded, try to add spares devices
* 6/ If array has spares or is not in-sync, start a resync thread.
*/
-void md_check_recovery(mddev_t *mddev)
+void md_check_recovery(struct mddev *mddev)
{
if (mddev->suspended)
return;
/* Only thing we do on a ro array is remove
* failed devices.
*/
- mdk_rdev_t *rdev;
- list_for_each_entry(rdev, &mddev->disks, same_set)
+ struct md_rdev *rdev;
+ rdev_for_each(rdev, mddev)
if (rdev->raid_disk >= 0 &&
!test_bit(Blocked, &rdev->flags) &&
test_bit(Faulty, &rdev->flags) &&
atomic_read(&rdev->nr_pending)==0) {
if (mddev->pers->hot_remove_disk(
- mddev, rdev->raid_disk)==0) {
+ mddev, rdev) == 0) {
sysfs_unlink_rdev(mddev, rdev);
rdev->raid_disk = -1;
}
* any transients in the value of "sync_action".
*/
set_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
- clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
/* Clear some bits that don't mean anything, but
* might be left set
*/
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
- if (test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
+ if (!test_and_clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_FROZEN, &mddev->recovery))
goto unlock;
/* no recovery is running.
* remove any failed drives, then
}
}
-void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev)
+void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev)
{
sysfs_notify_dirent_safe(rdev->sysfs_state);
wait_event_timeout(rdev->blocked_wait,
- !test_bit(Blocked, &rdev->flags),
+ !test_bit(Blocked, &rdev->flags) &&
+ !test_bit(BlockedBadBlocks, &rdev->flags),
msecs_to_jiffies(5000));
rdev_dec_pending(rdev, mddev);
}
EXPORT_SYMBOL(md_wait_for_blocked_rdev);
-static int md_notify_reboot(struct notifier_block *this,
- unsigned long code, void *x)
+
+/* Bad block management.
+ * We can record which blocks on each device are 'bad' and so just
+ * fail those blocks, or that stripe, rather than the whole device.
+ * Entries in the bad-block table are 64bits wide. This comprises:
+ * Length of bad-range, in sectors: 0-511 for lengths 1-512
+ * Start of bad-range, sector offset, 54 bits (allows 8 exbibytes)
+ * A 'shift' can be set so that larger blocks are tracked and
+ * consequently larger devices can be covered.
+ * 'Acknowledged' flag - 1 bit. - the most significant bit.
+ *
+ * Locking of the bad-block table uses a seqlock so md_is_badblock
+ * might need to retry if it is very unlucky.
+ * We will sometimes want to check for bad blocks in a bi_end_io function,
+ * so we use the write_seqlock_irq variant.
+ *
+ * When looking for a bad block we specify a range and want to
+ * know if any block in the range is bad. So we binary-search
+ * to the last range that starts at-or-before the given endpoint,
+ * (or "before the sector after the target range")
+ * then see if it ends after the given start.
+ * We return
+ * 0 if there are no known bad blocks in the range
+ * 1 if there are known bad block which are all acknowledged
+ * -1 if there are bad blocks which have not yet been acknowledged in metadata.
+ * plus the start/length of the first bad section we overlap.
+ */
+int md_is_badblock(struct badblocks *bb, sector_t s, int sectors,
+ sector_t *first_bad, int *bad_sectors)
{
- struct list_head *tmp;
- mddev_t *mddev;
+ int hi;
+ int lo = 0;
+ u64 *p = bb->page;
+ int rv = 0;
+ sector_t target = s + sectors;
+ unsigned seq;
+
+ if (bb->shift > 0) {
+ /* round the start down, and the end up */
+ s >>= bb->shift;
+ target += (1<<bb->shift) - 1;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+ /* 'target' is now the first block after the bad range */
- if ((code == SYS_DOWN) || (code == SYS_HALT) || (code == SYS_POWER_OFF)) {
+retry:
+ seq = read_seqbegin(&bb->lock);
- printk(KERN_INFO "md: stopping all md devices.\n");
+ hi = bb->count;
- for_each_mddev(mddev, tmp)
- if (mddev_trylock(mddev)) {
- /* Force a switch to readonly even array
- * appears to still be in use. Hence
- * the '100'.
+ /* Binary search between lo and hi for 'target'
+ * i.e. for the last range that starts before 'target'
+ */
+ /* INVARIANT: ranges before 'lo' and at-or-after 'hi'
+ * are known not to be the last range before target.
+ * VARIANT: hi-lo is the number of possible
+ * ranges, and decreases until it reaches 1
+ */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a < target)
+ /* This could still be the one, earlier ranges
+ * could not. */
+ lo = mid;
+ else
+ /* This and later ranges are definitely out. */
+ hi = mid;
+ }
+ /* 'lo' might be the last that started before target, but 'hi' isn't */
+ if (hi > lo) {
+ /* need to check all range that end after 's' to see if
+ * any are unacknowledged.
+ */
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ if (BB_OFFSET(p[lo]) < target) {
+ /* starts before the end, and finishes after
+ * the start, so they must overlap
*/
- md_set_readonly(mddev, 100);
- mddev_unlock(mddev);
+ if (rv != -1 && BB_ACK(p[lo]))
+ rv = 1;
+ else
+ rv = -1;
+ *first_bad = BB_OFFSET(p[lo]);
+ *bad_sectors = BB_LEN(p[lo]);
}
- /*
- * certain more exotic SCSI devices are known to be
- * volatile wrt too early system reboots. While the
- * right place to handle this issue is the given
- * driver, we do want to have a safe RAID driver ...
+ lo--;
+ }
+ }
+
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return rv;
+}
+EXPORT_SYMBOL_GPL(md_is_badblock);
+
+/*
+ * Add a range of bad blocks to the table.
+ * This might extend the table, or might contract it
+ * if two adjacent ranges can be merged.
+ * We binary-search to find the 'insertion' point, then
+ * decide how best to handle it.
+ */
+static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors,
+ int acknowledged)
+{
+ u64 *p;
+ int lo, hi;
+ int rv = 1;
+
+ if (bb->shift < 0)
+ /* badblocks are disabled */
+ return 0;
+
+ if (bb->shift) {
+ /* round the start down, and the end up */
+ sector_t next = s + sectors;
+ s >>= bb->shift;
+ next += (1<<bb->shift) - 1;
+ next >>= bb->shift;
+ sectors = next - s;
+ }
+
+ write_seqlock_irq(&bb->lock);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts at-or-before 's' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a <= s)
+ lo = mid;
+ else
+ hi = mid;
+ }
+ if (hi > lo && BB_OFFSET(p[lo]) > s)
+ hi = lo;
+
+ if (hi > lo) {
+ /* we found a range that might merge with the start
+ * of our new range
*/
- mdelay(1000*1);
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t e = a + BB_LEN(p[lo]);
+ int ack = BB_ACK(p[lo]);
+ if (e >= s) {
+ /* Yes, we can merge with a previous range */
+ if (s == a && s + sectors >= e)
+ /* new range covers old */
+ ack = acknowledged;
+ else
+ ack = ack && acknowledged;
+
+ if (e < s + sectors)
+ e = s + sectors;
+ if (e - a <= BB_MAX_LEN) {
+ p[lo] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ /* does not all fit in one range,
+ * make p[lo] maximal
+ */
+ if (BB_LEN(p[lo]) != BB_MAX_LEN)
+ p[lo] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ }
+ }
+ if (sectors && hi < bb->count) {
+ /* 'hi' points to the first range that starts after 's'.
+ * Maybe we can merge with the start of that range */
+ sector_t a = BB_OFFSET(p[hi]);
+ sector_t e = a + BB_LEN(p[hi]);
+ int ack = BB_ACK(p[hi]);
+ if (a <= s + sectors) {
+ /* merging is possible */
+ if (e <= s + sectors) {
+ /* full overlap */
+ e = s + sectors;
+ ack = acknowledged;
+ } else
+ ack = ack && acknowledged;
+
+ a = s;
+ if (e - a <= BB_MAX_LEN) {
+ p[hi] = BB_MAKE(a, e-a, ack);
+ s = e;
+ } else {
+ p[hi] = BB_MAKE(a, BB_MAX_LEN, ack);
+ s = a + BB_MAX_LEN;
+ }
+ sectors = e - s;
+ lo = hi;
+ hi++;
+ }
+ }
+ if (sectors == 0 && hi < bb->count) {
+ /* we might be able to combine lo and hi */
+ /* Note: 's' is at the end of 'lo' */
+ sector_t a = BB_OFFSET(p[hi]);
+ int lolen = BB_LEN(p[lo]);
+ int hilen = BB_LEN(p[hi]);
+ int newlen = lolen + hilen - (s - a);
+ if (s >= a && newlen < BB_MAX_LEN) {
+ /* yes, we can combine them */
+ int ack = BB_ACK(p[lo]) && BB_ACK(p[hi]);
+ p[lo] = BB_MAKE(BB_OFFSET(p[lo]), newlen, ack);
+ memmove(p + hi, p + hi + 1,
+ (bb->count - hi - 1) * 8);
+ bb->count--;
+ }
+ }
+ while (sectors) {
+ /* didn't merge (it all).
+ * Need to add a range just before 'hi' */
+ if (bb->count >= MD_MAX_BADBLOCKS) {
+ /* No room for more */
+ rv = 0;
+ break;
+ } else {
+ int this_sectors = sectors;
+ memmove(p + hi + 1, p + hi,
+ (bb->count - hi) * 8);
+ bb->count++;
+
+ if (this_sectors > BB_MAX_LEN)
+ this_sectors = BB_MAX_LEN;
+ p[hi] = BB_MAKE(s, this_sectors, acknowledged);
+ sectors -= this_sectors;
+ s += this_sectors;
+ }
+ }
+
+ bb->changed = 1;
+ if (!acknowledged)
+ bb->unacked_exist = 1;
+ write_sequnlock_irq(&bb->lock);
+
+ return rv;
+}
+
+int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
+ int acknowledged)
+{
+ int rv = md_set_badblocks(&rdev->badblocks,
+ s + rdev->data_offset, sectors, acknowledged);
+ if (rv) {
+ /* Make sure they get written out promptly */
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags);
+ md_wakeup_thread(rdev->mddev->thread);
+ }
+ return rv;
+}
+EXPORT_SYMBOL_GPL(rdev_set_badblocks);
+
+/*
+ * Remove a range of bad blocks from the table.
+ * This may involve extending the table if we spilt a region,
+ * but it must not fail. So if the table becomes full, we just
+ * drop the remove request.
+ */
+static int md_clear_badblocks(struct badblocks *bb, sector_t s, int sectors)
+{
+ u64 *p;
+ int lo, hi;
+ sector_t target = s + sectors;
+ int rv = 0;
+
+ if (bb->shift > 0) {
+ /* When clearing we round the start up and the end down.
+ * This should not matter as the shift should align with
+ * the block size and no rounding should ever be needed.
+ * However it is better the think a block is bad when it
+ * isn't than to think a block is not bad when it is.
+ */
+ s += (1<<bb->shift) - 1;
+ s >>= bb->shift;
+ target >>= bb->shift;
+ sectors = target - s;
+ }
+
+ write_seqlock_irq(&bb->lock);
+
+ p = bb->page;
+ lo = 0;
+ hi = bb->count;
+ /* Find the last range that starts before 'target' */
+ while (hi - lo > 1) {
+ int mid = (lo + hi) / 2;
+ sector_t a = BB_OFFSET(p[mid]);
+ if (a < target)
+ lo = mid;
+ else
+ hi = mid;
}
+ if (hi > lo) {
+ /* p[lo] is the last range that could overlap the
+ * current range. Earlier ranges could also overlap,
+ * but only this one can overlap the end of the range.
+ */
+ if (BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > target) {
+ /* Partial overlap, leave the tail of this range */
+ int ack = BB_ACK(p[lo]);
+ sector_t a = BB_OFFSET(p[lo]);
+ sector_t end = a + BB_LEN(p[lo]);
+
+ if (a < s) {
+ /* we need to split this range */
+ if (bb->count >= MD_MAX_BADBLOCKS) {
+ rv = 0;
+ goto out;
+ }
+ memmove(p+lo+1, p+lo, (bb->count - lo) * 8);
+ bb->count++;
+ p[lo] = BB_MAKE(a, s-a, ack);
+ lo++;
+ }
+ p[lo] = BB_MAKE(target, end - target, ack);
+ /* there is no longer an overlap */
+ hi = lo;
+ lo--;
+ }
+ while (lo >= 0 &&
+ BB_OFFSET(p[lo]) + BB_LEN(p[lo]) > s) {
+ /* This range does overlap */
+ if (BB_OFFSET(p[lo]) < s) {
+ /* Keep the early parts of this range. */
+ int ack = BB_ACK(p[lo]);
+ sector_t start = BB_OFFSET(p[lo]);
+ p[lo] = BB_MAKE(start, s - start, ack);
+ /* now low doesn't overlap, so.. */
+ break;
+ }
+ lo--;
+ }
+ /* 'lo' is strictly before, 'hi' is strictly after,
+ * anything between needs to be discarded
+ */
+ if (hi - lo > 1) {
+ memmove(p+lo+1, p+hi, (bb->count - hi) * 8);
+ bb->count -= (hi - lo - 1);
+ }
+ }
+
+ bb->changed = 1;
+out:
+ write_sequnlock_irq(&bb->lock);
+ return rv;
+}
+
+int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors)
+{
+ return md_clear_badblocks(&rdev->badblocks,
+ s + rdev->data_offset,
+ sectors);
+}
+EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
+
+/*
+ * Acknowledge all bad blocks in a list.
+ * This only succeeds if ->changed is clear. It is used by
+ * in-kernel metadata updates
+ */
+void md_ack_all_badblocks(struct badblocks *bb)
+{
+ if (bb->page == NULL || bb->changed)
+ /* no point even trying */
+ return;
+ write_seqlock_irq(&bb->lock);
+
+ if (bb->changed == 0 && bb->unacked_exist) {
+ u64 *p = bb->page;
+ int i;
+ for (i = 0; i < bb->count ; i++) {
+ if (!BB_ACK(p[i])) {
+ sector_t start = BB_OFFSET(p[i]);
+ int len = BB_LEN(p[i]);
+ p[i] = BB_MAKE(start, len, 1);
+ }
+ }
+ bb->unacked_exist = 0;
+ }
+ write_sequnlock_irq(&bb->lock);
+}
+EXPORT_SYMBOL_GPL(md_ack_all_badblocks);
+
+/* sysfs access to bad-blocks list.
+ * We present two files.
+ * 'bad-blocks' lists sector numbers and lengths of ranges that
+ * are recorded as bad. The list is truncated to fit within
+ * the one-page limit of sysfs.
+ * Writing "sector length" to this file adds an acknowledged
+ * bad block list.
+ * 'unacknowledged-bad-blocks' lists bad blocks that have not yet
+ * been acknowledged. Writing to this file adds bad blocks
+ * without acknowledging them. This is largely for testing.
+ */
+
+static ssize_t
+badblocks_show(struct badblocks *bb, char *page, int unack)
+{
+ size_t len;
+ int i;
+ u64 *p = bb->page;
+ unsigned seq;
+
+ if (bb->shift < 0)
+ return 0;
+
+retry:
+ seq = read_seqbegin(&bb->lock);
+
+ len = 0;
+ i = 0;
+
+ while (len < PAGE_SIZE && i < bb->count) {
+ sector_t s = BB_OFFSET(p[i]);
+ unsigned int length = BB_LEN(p[i]);
+ int ack = BB_ACK(p[i]);
+ i++;
+
+ if (unack && ack)
+ continue;
+
+ len += snprintf(page+len, PAGE_SIZE-len, "%llu %u\n",
+ (unsigned long long)s << bb->shift,
+ length << bb->shift);
+ }
+ if (unack && len == 0)
+ bb->unacked_exist = 0;
+
+ if (read_seqretry(&bb->lock, seq))
+ goto retry;
+
+ return len;
+}
+
+#define DO_DEBUG 1
+
+static ssize_t
+badblocks_store(struct badblocks *bb, const char *page, size_t len, int unack)
+{
+ unsigned long long sector;
+ int length;
+ char newline;
+#ifdef DO_DEBUG
+ /* Allow clearing via sysfs *only* for testing/debugging.
+ * Normally only a successful write may clear a badblock
+ */
+ int clear = 0;
+ if (page[0] == '-') {
+ clear = 1;
+ page++;
+ }
+#endif /* DO_DEBUG */
+
+ switch (sscanf(page, "%llu %d%c", §or, &length, &newline)) {
+ case 3:
+ if (newline != '\n')
+ return -EINVAL;
+ case 2:
+ if (length <= 0)
+ return -EINVAL;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+#ifdef DO_DEBUG
+ if (clear) {
+ md_clear_badblocks(bb, sector, length);
+ return len;
+ }
+#endif /* DO_DEBUG */
+ if (md_set_badblocks(bb, sector, length, !unack))
+ return len;
+ else
+ return -ENOSPC;
+}
+
+static int md_notify_reboot(struct notifier_block *this,
+ unsigned long code, void *x)
+{
+ struct list_head *tmp;
+ struct mddev *mddev;
+ int need_delay = 0;
+
+ for_each_mddev(mddev, tmp) {
+ if (mddev_trylock(mddev)) {
+ if (mddev->pers)
+ __md_stop_writes(mddev);
+ mddev->safemode = 2;
+ mddev_unlock(mddev);
+ }
+ need_delay = 1;
+ }
+ /*
+ * certain more exotic SCSI devices are known to be
+ * volatile wrt too early system reboots. While the
+ * right place to handle this issue is the given
+ * driver, we do want to have a safe RAID driver ...
+ */
+ if (need_delay)
+ mdelay(1000*1);
+
return NOTIFY_DONE;
}
static void md_geninit(void)
{
- dprintk("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
+ pr_debug("md: sizeof(mdp_super_t) = %d\n", (int)sizeof(mdp_super_t));
proc_create("mdstat", S_IRUGO, NULL, &md_seq_fops);
}
static void autostart_arrays(int part)
{
- mdk_rdev_t *rdev;
+ struct md_rdev *rdev;
struct detected_devices_node *node_detected_dev;
dev_t dev;
int i_scanned, i_passed;
static __exit void md_exit(void)
{
- mddev_t *mddev;
+ struct mddev *mddev;
struct list_head *tmp;
blk_unregister_region(MKDEV(MD_MAJOR,0), 1U << MINORBITS);