X-Git-Url: http://git.alex.org.uk diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c index 1c509d5..a4f58e9 100644 --- a/fs/ext4/move_extent.c +++ b/fs/ext4/move_extent.c @@ -15,6 +15,7 @@ #include #include +#include #include "ext4_jbd2.h" #include "ext4_extents.h" #include "ext4.h" @@ -77,12 +78,14 @@ static int mext_next_extent(struct inode *inode, struct ext4_ext_path *path, struct ext4_extent **extent) { + struct ext4_extent_header *eh; int ppos, leaf_ppos = path->p_depth; ppos = leaf_ppos; if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { /* leaf block */ *extent = ++path[ppos].p_ext; + path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext); return 0; } @@ -93,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, /* index block */ path[ppos].p_idx++; - path[ppos].p_block = idx_pblock(path[ppos].p_idx); + path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx); if (path[ppos+1].p_bh) brelse(path[ppos+1].p_bh); path[ppos+1].p_bh = @@ -108,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, path[cur_ppos].p_idx = EXT_FIRST_INDEX(path[cur_ppos].p_hdr); path[cur_ppos].p_block = - idx_pblock(path[cur_ppos].p_idx); + ext4_idx_pblock(path[cur_ppos].p_idx); if (path[cur_ppos+1].p_bh) brelse(path[cur_ppos+1].p_bh); path[cur_ppos+1].p_bh = sb_bread(inode->i_sb, @@ -119,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, ext_block_hdr(path[cur_ppos+1].p_bh); } + path[leaf_ppos].p_ext = *extent = NULL; + + eh = path[leaf_ppos].p_hdr; + if (le16_to_cpu(eh->eh_entries) == 0) + /* empty leaf is found */ + return -ENODATA; + /* leaf block */ path[leaf_ppos].p_ext = *extent = EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); + path[leaf_ppos].p_block = + ext4_ext_pblock(path[leaf_ppos].p_ext); return 0; } } @@ -136,17 +148,17 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path, */ static int mext_check_null_inode(struct inode *inode1, struct inode *inode2, - const char *function) + const char *function, unsigned int line) { int ret = 0; if (inode1 == NULL) { - ext4_error(inode2->i_sb, function, + __ext4_error(inode2->i_sb, function, line, "Both inodes should not be NULL: " "inode1 NULL inode2 %lu", inode2->i_ino); ret = -EIO; } else if (inode2 == NULL) { - ext4_error(inode1->i_sb, function, + __ext4_error(inode1->i_sb, function, line, "Both inodes should not be NULL: " "inode1 %lu inode2 NULL", inode1->i_ino); ret = -EIO; @@ -155,40 +167,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2, } /** - * mext_double_down_read - Acquire two inodes' read semaphore + * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem * * @orig_inode: original inode structure * @donor_inode: donor inode structure - * Acquire read semaphore of the two inodes (orig and donor) by i_ino order. + * Acquire write lock of i_data_sem of the two inodes (orig and donor) by + * i_ino order. */ static void -mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode) -{ - struct inode *first = orig_inode, *second = donor_inode; - - /* - * Use the inode number to provide the stable locking order instead - * of its address, because the C language doesn't guarantee you can - * compare pointers that don't come from the same array. - */ - if (donor_inode->i_ino < orig_inode->i_ino) { - first = donor_inode; - second = orig_inode; - } - - down_read(&EXT4_I(first)->i_data_sem); - down_read(&EXT4_I(second)->i_data_sem); -} - -/** - * mext_double_down_write - Acquire two inodes' write semaphore - * - * @orig_inode: original inode structure - * @donor_inode: donor inode structure - * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. - */ -static void -mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) +double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { struct inode *first = orig_inode, *second = donor_inode; @@ -203,32 +190,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) } down_write(&EXT4_I(first)->i_data_sem); - down_write(&EXT4_I(second)->i_data_sem); -} - -/** - * mext_double_up_read - Release two inodes' read semaphore - * - * @orig_inode: original inode structure to be released its lock first - * @donor_inode: donor inode structure to be released its lock second - * Release read semaphore of two inodes (orig and donor). - */ -static void -mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) -{ - up_read(&EXT4_I(orig_inode)->i_data_sem); - up_read(&EXT4_I(donor_inode)->i_data_sem); + down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING); } /** - * mext_double_up_write - Release two inodes' write semaphore + * double_up_write_data_sem - Release two inodes' write lock of i_data_sem * * @orig_inode: original inode structure to be released its lock first * @donor_inode: donor inode structure to be released its lock second - * Release write semaphore of two inodes (orig and donor). + * Release write lock of i_data_sem of two inodes (orig and donor). */ static void -mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode) +double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode) { up_write(&EXT4_I(orig_inode)->i_data_sem); up_write(&EXT4_I(donor_inode)->i_data_sem); @@ -276,10 +249,11 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, */ o_end->ee_block = end_ext->ee_block; o_end->ee_len = end_ext->ee_len; - ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); } o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (start_ext->ee_len && new_ext->ee_len && @@ -290,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, * orig |------------------------------| */ o_start->ee_len = start_ext->ee_len; + eblock = le32_to_cpu(start_ext->ee_block); new_flag = 1; } else if (!start_ext->ee_len && new_ext->ee_len && @@ -301,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, */ o_end->ee_block = end_ext->ee_block; o_end->ee_len = end_ext->ee_len; - ext4_ext_store_pblock(o_end, ext_pblock(end_ext)); + ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext)); /* * Set 0 to the extent block if new_ext was @@ -322,7 +297,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, goto out; if (ext4_ext_insert_extent(handle, orig_inode, - orig_path, new_ext)) + orig_path, new_ext, 0)) goto out; } @@ -333,7 +308,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode, goto out; if (ext4_ext_insert_extent(handle, orig_inode, - orig_path, end_ext)) + orig_path, end_ext, 0)) goto out; } out: @@ -386,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start, /* Insert new entry */ if (new_ext->ee_len) { o_start[i] = *new_ext; - ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext)); + ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext)); } /* Insert end entry */ @@ -503,21 +478,20 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, struct ext4_extent *oext, *o_start, *o_end, *prev_ext; struct ext4_extent new_ext, start_ext, end_ext; ext4_lblk_t new_ext_end; - ext4_fsblk_t new_phys_end; int oext_alen, new_ext_alen, end_ext_alen; int depth = ext_depth(orig_inode); int ret; + start_ext.ee_block = end_ext.ee_block = 0; o_start = o_end = oext = orig_path[depth].p_ext; oext_alen = ext4_ext_get_actual_len(oext); start_ext.ee_len = end_ext.ee_len = 0; new_ext.ee_block = cpu_to_le32(*from); - ext4_ext_store_pblock(&new_ext, ext_pblock(dext)); + ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext)); new_ext.ee_len = dext->ee_len; new_ext_alen = ext4_ext_get_actual_len(&new_ext); new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1; - new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1; /* * Case: original extent is first @@ -530,6 +504,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, le32_to_cpu(oext->ee_block) + oext_alen) { start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) - le32_to_cpu(oext->ee_block)); + start_ext.ee_block = oext->ee_block; copy_extent_status(oext, &start_ext); } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) { prev_ext = oext - 1; @@ -543,6 +518,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, start_ext.ee_len = cpu_to_le16( ext4_ext_get_actual_len(prev_ext) + new_ext_alen); + start_ext.ee_block = oext->ee_block; copy_extent_status(prev_ext, &start_ext); new_ext.ee_len = 0; } @@ -554,7 +530,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, * new_ext |-------| */ if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) { - ext4_error(orig_inode->i_sb, __func__, + EXT4_ERROR_INODE(orig_inode, "new_ext_end(%u) should be less than or equal to " "oext->ee_block(%u) + oext_alen(%d) - 1", new_ext_end, le32_to_cpu(oext->ee_block), @@ -577,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode, copy_extent_status(oext, &end_ext); end_ext_alen = ext4_ext_get_actual_len(&end_ext); ext4_ext_store_pblock(&end_ext, - (ext_pblock(o_end) + oext_alen - end_ext_alen)); + (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen)); end_ext.ee_block = cpu_to_le32(le32_to_cpu(o_end->ee_block) + oext_alen - end_ext_alen); @@ -596,9 +572,11 @@ out: * @tmp_oext: the extent that will belong to the donor inode * @orig_off: block offset of original inode * @donor_off: block offset of donor inode - * @max_count: the maximun length of extents + * @max_count: the maximum length of extents + * + * Return 0 on success, or a negative error value on failure. */ -static void +static int mext_calc_swap_extents(struct ext4_extent *tmp_dext, struct ext4_extent *tmp_oext, ext4_lblk_t orig_off, ext4_lblk_t donor_off, @@ -607,13 +585,26 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, ext4_lblk_t diff, orig_diff; struct ext4_extent dext_old, oext_old; + BUG_ON(orig_off != donor_off); + + /* original and donor extents have to cover the same block offset */ + if (orig_off < le32_to_cpu(tmp_oext->ee_block) || + le32_to_cpu(tmp_oext->ee_block) + + ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off) + return -ENODATA; + + if (orig_off < le32_to_cpu(tmp_dext->ee_block) || + le32_to_cpu(tmp_dext->ee_block) + + ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off) + return -ENODATA; + dext_old = *tmp_dext; oext_old = *tmp_oext; /* When tmp_dext is too large, pick up the target range. */ diff = donor_off - le32_to_cpu(tmp_dext->ee_block); - ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff); + ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff); tmp_dext->ee_block = cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff); tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff); @@ -622,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, tmp_dext->ee_len = cpu_to_le16(max_count); orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block); - ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff); + ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff); /* Adjust extent length if donor extent is larger than orig */ if (ext4_ext_get_actual_len(tmp_dext) > @@ -634,6 +625,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, copy_extent_status(&oext_old, tmp_dext); copy_extent_status(&dext_old, tmp_oext); + + return 0; } /** @@ -644,6 +637,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * @donor_inode: donor inode * @from: block offset of orig_inode * @count: block count to be replaced + * @err: pointer to save return value * * Replace original inode extents and donor inode extents page by page. * We implement this replacement in the following three steps: @@ -654,33 +648,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext, * 3. Change the block information of donor inode to point at the saved * original inode blocks in the dummy extents. * - * Return 0 on success, or a negative error value on failure. + * Return replaced block count. */ static int mext_replace_branches(handle_t *handle, struct inode *orig_inode, struct inode *donor_inode, ext4_lblk_t from, - ext4_lblk_t count) + ext4_lblk_t count, int *err) { struct ext4_ext_path *orig_path = NULL; struct ext4_ext_path *donor_path = NULL; struct ext4_extent *oext, *dext; struct ext4_extent tmp_dext, tmp_oext; ext4_lblk_t orig_off = from, donor_off = from; - int err = 0; int depth; int replaced_count = 0; int dext_alen; - mext_double_down_write(orig_inode, donor_inode); + /* Protect extent trees against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); /* Get the original extent for the block "orig_off" */ - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; /* Get the donor extent for the head */ - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; @@ -690,37 +684,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, dext = donor_path[depth].p_ext; tmp_dext = *dext; - mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, donor_off, count); + if (*err) + goto out; /* Loop for the donor extents */ while (1) { /* The extent for donor must be found. */ if (!dext) { - ext4_error(donor_inode->i_sb, __func__, + EXT4_ERROR_INODE(donor_inode, "The extent for donor must be found"); - err = -EIO; + *err = -EIO; goto out; } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { - ext4_error(donor_inode->i_sb, __func__, + EXT4_ERROR_INODE(donor_inode, "Donor offset(%u) and the first block of donor " "extent(%u) should be equal", donor_off, le32_to_cpu(tmp_dext.ee_block)); - err = -EIO; + *err = -EIO; goto out; } /* Set donor extent to orig extent */ - err = mext_leaf_block(handle, orig_inode, + *err = mext_leaf_block(handle, orig_inode, orig_path, &tmp_dext, &orig_off); - if (err < 0) + if (*err) goto out; /* Set orig extent to donor extent */ - err = mext_leaf_block(handle, donor_inode, + *err = mext_leaf_block(handle, donor_inode, donor_path, &tmp_oext, &donor_off); - if (err < 0) + if (*err) goto out; dext_alen = ext4_ext_get_actual_len(&tmp_dext); @@ -734,35 +730,26 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode, if (orig_path) ext4_ext_drop_refs(orig_path); - err = get_ext_path(orig_inode, orig_off, &orig_path); - if (err) + *err = get_ext_path(orig_inode, orig_off, &orig_path); + if (*err) goto out; depth = ext_depth(orig_inode); oext = orig_path[depth].p_ext; - if (le32_to_cpu(oext->ee_block) + - ext4_ext_get_actual_len(oext) <= orig_off) { - err = 0; - goto out; - } tmp_oext = *oext; if (donor_path) ext4_ext_drop_refs(donor_path); - err = get_ext_path(donor_inode, donor_off, &donor_path); - if (err) + *err = get_ext_path(donor_inode, donor_off, &donor_path); + if (*err) goto out; depth = ext_depth(donor_inode); dext = donor_path[depth].p_ext; - if (le32_to_cpu(dext->ee_block) + - ext4_ext_get_actual_len(dext) <= donor_off) { - err = 0; - goto out; - } tmp_dext = *dext; - mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, - donor_off, - count - replaced_count); + *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, + donor_off, count - replaced_count); + if (*err) + goto out; } out: @@ -775,8 +762,12 @@ out: kfree(donor_path); } - mext_double_up_write(orig_inode, donor_inode); - return err; + ext4_ext_invalidate_cache(orig_inode); + ext4_ext_invalidate_cache(donor_inode); + + double_up_write_data_sem(orig_inode, donor_inode); + + return replaced_count; } /** @@ -788,16 +779,17 @@ out: * @data_offset_in_page: block index where data swapping starts * @block_len_in_page: the number of blocks to be swapped * @uninit: orig extent is uninitialized or not + * @err: pointer to save return value * * Save the data in original inode blocks and replace original inode extents * with donor inode extents by calling mext_replace_branches(). - * Finally, write out the saved data in new original inode blocks. Return 0 - * on success, or a negative error value on failure. + * Finally, write out the saved data in new original inode blocks. Return + * replaced block count. */ static int move_extent_per_page(struct file *o_filp, struct inode *donor_inode, pgoff_t orig_page_offset, int data_offset_in_page, - int block_len_in_page, int uninit) + int block_len_in_page, int uninit, int *err) { struct inode *orig_inode = o_filp->f_dentry->d_inode; struct address_space *mapping = orig_inode->i_mapping; @@ -809,9 +801,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, long long offs = orig_page_offset << PAGE_CACHE_SHIFT; unsigned long blocksize = orig_inode->i_sb->s_blocksize; unsigned int w_flags = 0; - unsigned int tmp_data_len, data_len; + unsigned int tmp_data_size, data_size, replaced_size; void *fsdata; - int ret, i, jblocks; + int i, jblocks; + int err2 = 0; + int replaced_count = 0; int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; /* @@ -821,8 +815,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; handle = ext4_journal_start(orig_inode, jblocks); if (IS_ERR(handle)) { - ret = PTR_ERR(handle); - return ret; + *err = PTR_ERR(handle); + return 0; } if (segment_eq(get_fs(), KERNEL_DS)) @@ -838,39 +832,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, * Just swap data blocks between orig and donor. */ if (uninit) { - ret = mext_replace_branches(handle, orig_inode, - donor_inode, orig_blk_offset, - block_len_in_page); - - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); + replaced_count = mext_replace_branches(handle, orig_inode, + donor_inode, orig_blk_offset, + block_len_in_page, err); goto out2; } offs = (long long)orig_blk_offset << orig_inode->i_blkbits; - /* Calculate data_len */ + /* Calculate data_size */ if ((orig_blk_offset + block_len_in_page - 1) == ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { /* Replace the last block */ - tmp_data_len = orig_inode->i_size & (blocksize - 1); + tmp_data_size = orig_inode->i_size & (blocksize - 1); /* - * If data_len equal zero, it shows data_len is multiples of + * If data_size equal zero, it shows data_size is multiples of * blocksize. So we set appropriate value. */ - if (tmp_data_len == 0) - tmp_data_len = blocksize; + if (tmp_data_size == 0) + tmp_data_size = blocksize; - data_len = tmp_data_len + + data_size = tmp_data_size + ((block_len_in_page - 1) << orig_inode->i_blkbits); - } else { - data_len = block_len_in_page << orig_inode->i_blkbits; - } + } else + data_size = block_len_in_page << orig_inode->i_blkbits; + + replaced_size = data_size; - ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, + *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags, &page, &fsdata); - if (unlikely(ret < 0)) + if (unlikely(*err < 0)) goto out; if (!PageUptodate(page)) { @@ -891,14 +882,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, /* Release old bh and drop refs */ try_to_release_page(page, 0); - ret = mext_replace_branches(handle, orig_inode, donor_inode, - orig_blk_offset, block_len_in_page); - if (ret < 0) - goto out; - - /* Clear the inode cache not to refer to the old data */ - ext4_ext_invalidate_cache(orig_inode); - ext4_ext_invalidate_cache(donor_inode); + replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, + orig_blk_offset, block_len_in_page, + &err2); + if (err2) { + if (replaced_count) { + block_len_in_page = replaced_count; + replaced_size = + block_len_in_page << orig_inode->i_blkbits; + } else + goto out; + } if (!page_has_buffers(page)) create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); @@ -908,16 +902,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode, bh = bh->b_this_page; for (i = 0; i < block_len_in_page; i++) { - ret = ext4_get_block(orig_inode, + *err = ext4_get_block(orig_inode, (sector_t)(orig_blk_offset + i), bh, 0); - if (ret < 0) + if (*err < 0) goto out; if (bh->b_this_page != NULL) bh = bh->b_this_page; } - ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, + *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size, page, fsdata); page = NULL; @@ -931,18 +925,20 @@ out: out2: ext4_journal_stop(handle); - return ret < 0 ? ret : 0; + if (err2) + *err = err2; + + return replaced_count; } /** - * mext_check_argumants - Check whether move extent can be done + * mext_check_arguments - Check whether move extent can be done * * @orig_inode: original inode * @donor_inode: donor inode * @orig_start: logical start offset in block for orig * @donor_start: logical start offset in block for donor * @len: the number of blocks to be moved - * @moved_len: moved block length * * Check the arguments of ext4_move_extents() whether the files can be * exchanged with each other. @@ -950,21 +946,23 @@ out2: */ static int mext_check_arguments(struct inode *orig_inode, - struct inode *donor_inode, __u64 orig_start, - __u64 donor_start, __u64 *len, __u64 moved_len) + struct inode *donor_inode, __u64 orig_start, + __u64 donor_start, __u64 *len) { ext4_lblk_t orig_blocks, donor_blocks; unsigned int blkbits = orig_inode->i_blkbits; unsigned int blocksize = 1 << blkbits; - /* Regular file check */ - if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { - ext4_debug("ext4 move extent: The argument files should be " - "regular file [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); + if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { + ext4_debug("ext4 move extent: suid or sgid is set" + " to donor file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } + if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode)) + return -EPERM; + /* Ext4 move extent does not support swapfile */ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { ext4_debug("ext4 move extent: The argument files should " @@ -981,20 +979,12 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - /* orig and donor should be different file */ - if (orig_inode->i_ino == donor_inode->i_ino) { - ext4_debug("ext4 move extent: The argument files should not " - "be same file [ino:orig %lu, donor %lu]\n", - orig_inode->i_ino, donor_inode->i_ino); - return -EINVAL; - } - /* Ext4 move extent supports only extent based file */ - if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) { + if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: orig file is not extents " "based file [ino:orig %lu]\n", orig_inode->i_ino); return -EOPNOTSUPP; - } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) { + } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) { ext4_debug("ext4 move extent: donor file is not extents " "based file [ino:donor %lu]\n", donor_inode->i_ino); return -EOPNOTSUPP; @@ -1013,19 +1003,12 @@ mext_check_arguments(struct inode *orig_inode, return -EINVAL; } - if (moved_len) { - ext4_debug("ext4 move extent: moved_len should be 0 " - "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, - donor_inode->i_ino); - return -EINVAL; - } - - if ((orig_start > MAX_DEFRAG_SIZE) || - (donor_start > MAX_DEFRAG_SIZE) || - (*len > MAX_DEFRAG_SIZE) || - (orig_start + *len > MAX_DEFRAG_SIZE)) { - ext4_debug("ext4 move extent: Can't handle over [%lu] blocks " - "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE, + if ((orig_start >= EXT_MAX_BLOCKS) || + (donor_start >= EXT_MAX_BLOCKS) || + (*len > EXT_MAX_BLOCKS) || + (orig_start + *len >= EXT_MAX_BLOCKS)) { + ext4_debug("ext4 move extent: Can't handle over [%u] blocks " + "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; } @@ -1076,7 +1059,7 @@ mext_check_arguments(struct inode *orig_inode, } if (!*len) { - ext4_debug("ext4 move extent: len shoudld not be 0 " + ext4_debug("ext4 move extent: len should not be 0 " "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, donor_inode->i_ino); return -EINVAL; @@ -1101,7 +1084,7 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2) BUG_ON(inode1 == NULL && inode2 == NULL); - ret = mext_check_null_inode(inode1, inode2, __func__); + ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); if (ret < 0) goto out; @@ -1138,7 +1121,7 @@ mext_inode_double_unlock(struct inode *inode1, struct inode *inode2) BUG_ON(inode1 == NULL && inode2 == NULL); - ret = mext_check_null_inode(inode1, inode2, __func__); + ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__); if (ret < 0) goto out; @@ -1212,16 +1195,32 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, int block_len_in_page; int uninit; - /* protect orig and donor against a truncate */ + /* orig and donor should be different file */ + if (orig_inode->i_ino == donor_inode->i_ino) { + ext4_debug("ext4 move extent: The argument files should not " + "be same file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* Regular file check */ + if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) { + ext4_debug("ext4 move extent: The argument files should be " + "regular file [ino:orig %lu, donor %lu]\n", + orig_inode->i_ino, donor_inode->i_ino); + return -EINVAL; + } + + /* Protect orig and donor inodes against a truncate */ ret1 = mext_inode_double_lock(orig_inode, donor_inode); if (ret1 < 0) return ret1; - mext_double_down_read(orig_inode, donor_inode); + /* Protect extent tree against block allocations via delalloc */ + double_down_write_data_sem(orig_inode, donor_inode); /* Check the filesystem environment whether move_extent can be done */ ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, - donor_start, &len, *moved_len); - mext_double_up_read(orig_inode, donor_inode); + donor_start, &len); if (ret1) goto out; @@ -1243,11 +1242,15 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ext_cur = holecheck_path[depth].p_ext; /* - * Get proper extent whose ee_block is beyond block_start - * if block_start was within the hole. + * Get proper starting location of block replacement if block_start was + * within the hole. */ if (le32_to_cpu(ext_cur->ee_block) + ext4_ext_get_actual_len(ext_cur) - 1 < block_start) { + /* + * The hole exists between extents or the tail of + * original file. + */ last_extent = mext_next_extent(orig_inode, holecheck_path, &ext_cur); if (last_extent < 0) { @@ -1260,8 +1263,12 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, ret1 = last_extent; goto out; } - } - seq_start = block_start; + seq_start = le32_to_cpu(ext_cur->ee_block); + } else if (le32_to_cpu(ext_cur->ee_block) > block_start) + /* The hole exists at the beginning of original file. */ + seq_start = le32_to_cpu(ext_cur->ee_block); + else + seq_start = block_start; /* No blocks within the specified range. */ if (le32_to_cpu(ext_cur->ee_block) > block_end) { @@ -1327,36 +1334,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, seq_start = le32_to_cpu(ext_cur->ee_block); rest_blocks = seq_blocks; - /* Discard preallocations of two inodes */ - down_write(&EXT4_I(orig_inode)->i_data_sem); - ext4_discard_preallocations(orig_inode); - up_write(&EXT4_I(orig_inode)->i_data_sem); - - down_write(&EXT4_I(donor_inode)->i_data_sem); - ext4_discard_preallocations(donor_inode); - up_write(&EXT4_I(donor_inode)->i_data_sem); + /* + * Up semaphore to avoid following problems: + * a. transaction deadlock among ext4_journal_start, + * ->write_begin via pagefault, and jbd2_journal_commit + * b. racing with ->readpage, ->write_begin, and ext4_get_block + * in move_extent_per_page + */ + double_up_write_data_sem(orig_inode, donor_inode); while (orig_page_offset <= seq_end_page) { /* Swap original branches with new branches */ - ret1 = move_extent_per_page(o_filp, donor_inode, + block_len_in_page = move_extent_per_page( + o_filp, donor_inode, orig_page_offset, data_offset_in_page, - block_len_in_page, uninit); - if (ret1 < 0) - goto out; - orig_page_offset++; + block_len_in_page, uninit, + &ret1); + /* Count how many blocks we have exchanged */ *moved_len += block_len_in_page; + if (ret1 < 0) + break; if (*moved_len > len) { - ext4_error(orig_inode->i_sb, __func__, + EXT4_ERROR_INODE(orig_inode, "We replaced blocks too much! " "sum of replaced: %llu requested: %llu", *moved_len, len); ret1 = -EIO; - goto out; + break; } + orig_page_offset++; data_offset_in_page = 0; rest_blocks -= block_len_in_page; if (rest_blocks > blocks_per_page) @@ -1365,6 +1375,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, block_len_in_page = rest_blocks; } + double_down_write_data_sem(orig_inode, donor_inode); + if (ret1 < 0) + break; + /* Decrease buffer counter */ if (holecheck_path) ext4_ext_drop_refs(holecheck_path); @@ -1386,6 +1400,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, } out: + if (*moved_len) { + ext4_discard_preallocations(orig_inode); + ext4_discard_preallocations(donor_inode); + } + if (orig_path) { ext4_ext_drop_refs(orig_path); kfree(orig_path); @@ -1394,7 +1413,7 @@ out: ext4_ext_drop_refs(holecheck_path); kfree(holecheck_path); } - + double_up_write_data_sem(orig_inode, donor_inode); ret2 = mext_inode_double_unlock(orig_inode, donor_inode); if (ret1)