ext4: Fix max file size and logical block counting of extent format file, CVE-2011...
[linux-flexiantxendom0-natty.git] / fs / ext4 / move_extent.c
index e4bd876..a4f58e9 100644 (file)
@@ -15,6 +15,7 @@
 
 #include <linux/fs.h>
 #include <linux/quotaops.h>
+#include <linux/slab.h>
 #include "ext4_jbd2.h"
 #include "ext4_extents.h"
 #include "ext4.h"
@@ -39,7 +40,9 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
        if (IS_ERR(*path)) {
                ret = PTR_ERR(*path);
                *path = NULL;
-       }
+       } else if ((*path)[ext_depth(inode)].p_ext == NULL)
+               ret = -ENODATA;
+
        return ret;
 }
 
@@ -75,12 +78,14 @@ static int
 mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                      struct ext4_extent **extent)
 {
+       struct ext4_extent_header *eh;
        int ppos, leaf_ppos = path->p_depth;
 
        ppos = leaf_ppos;
        if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
                /* leaf block */
                *extent = ++path[ppos].p_ext;
+               path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
                return 0;
        }
 
@@ -91,7 +96,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 
                        /* index block */
                        path[ppos].p_idx++;
-                       path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+                       path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
                        if (path[ppos+1].p_bh)
                                brelse(path[ppos+1].p_bh);
                        path[ppos+1].p_bh =
@@ -106,7 +111,7 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                                path[cur_ppos].p_idx =
                                        EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
                                path[cur_ppos].p_block =
-                                       idx_pblock(path[cur_ppos].p_idx);
+                                       ext4_idx_pblock(path[cur_ppos].p_idx);
                                if (path[cur_ppos+1].p_bh)
                                        brelse(path[cur_ppos+1].p_bh);
                                path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
@@ -117,9 +122,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
                                        ext_block_hdr(path[cur_ppos+1].p_bh);
                        }
 
+                       path[leaf_ppos].p_ext = *extent = NULL;
+
+                       eh = path[leaf_ppos].p_hdr;
+                       if (le16_to_cpu(eh->eh_entries) == 0)
+                               /* empty leaf is found */
+                               return -ENODATA;
+
                        /* leaf block */
                        path[leaf_ppos].p_ext = *extent =
                                EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
+                       path[leaf_ppos].p_block =
+                                       ext4_ext_pblock(path[leaf_ppos].p_ext);
                        return 0;
                }
        }
@@ -128,47 +142,43 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
 }
 
 /**
- * mext_double_down_read - Acquire two inodes' read semaphore
+ * mext_check_null_inode - NULL check for two inodes
  *
- * @orig_inode:                original inode structure
- * @donor_inode:       donor inode structure
- * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
+ * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
  */
-static void
-mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
+static int
+mext_check_null_inode(struct inode *inode1, struct inode *inode2,
+                     const char *function, unsigned int line)
 {
-       struct inode *first = orig_inode, *second = donor_inode;
-
-       BUG_ON(orig_inode == NULL || donor_inode == NULL);
+       int ret = 0;
 
-       /*
-        * Use the inode number to provide the stable locking order instead
-        * of its address, because the C language doesn't guarantee you can
-        * compare pointers that don't come from the same array.
-        */
-       if (donor_inode->i_ino < orig_inode->i_ino) {
-               first = donor_inode;
-               second = orig_inode;
+       if (inode1 == NULL) {
+               __ext4_error(inode2->i_sb, function, line,
+                       "Both inodes should not be NULL: "
+                       "inode1 NULL inode2 %lu", inode2->i_ino);
+               ret = -EIO;
+       } else if (inode2 == NULL) {
+               __ext4_error(inode1->i_sb, function, line,
+                       "Both inodes should not be NULL: "
+                       "inode1 %lu inode2 NULL", inode1->i_ino);
+               ret = -EIO;
        }
-
-       down_read(&EXT4_I(first)->i_data_sem);
-       down_read(&EXT4_I(second)->i_data_sem);
+       return ret;
 }
 
 /**
- * mext_double_down_write - Acquire two inodes' write semaphore
+ * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
  *
  * @orig_inode:                original inode structure
  * @donor_inode:       donor inode structure
- * Acquire write semaphore of the two inodes (orig and donor) by i_ino order.
+ * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
+ * i_ino order.
  */
 static void
-mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
+double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 {
        struct inode *first = orig_inode, *second = donor_inode;
 
-       BUG_ON(orig_inode == NULL || donor_inode == NULL);
-
        /*
         * Use the inode number to provide the stable locking order instead
         * of its address, because the C language doesn't guarantee you can
@@ -180,37 +190,19 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
        }
 
        down_write(&EXT4_I(first)->i_data_sem);
-       down_write(&EXT4_I(second)->i_data_sem);
-}
-
-/**
- * mext_double_up_read - Release two inodes' read semaphore
- *
- * @orig_inode:                original inode structure to be released its lock first
- * @donor_inode:       donor inode structure to be released its lock second
- * Release read semaphore of two inodes (orig and donor).
- */
-static void
-mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode)
-{
-       BUG_ON(orig_inode == NULL || donor_inode == NULL);
-
-       up_read(&EXT4_I(orig_inode)->i_data_sem);
-       up_read(&EXT4_I(donor_inode)->i_data_sem);
+       down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
 }
 
 /**
- * mext_double_up_write - Release two inodes' write semaphore
+ * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
  *
  * @orig_inode:                original inode structure to be released its lock first
  * @donor_inode:       donor inode structure to be released its lock second
- * Release write semaphore of two inodes (orig and donor).
+ * Release write lock of i_data_sem of two inodes (orig and donor).
  */
 static void
-mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
+double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
 {
-       BUG_ON(orig_inode == NULL || donor_inode == NULL);
-
        up_write(&EXT4_I(orig_inode)->i_data_sem);
        up_write(&EXT4_I(donor_inode)->i_data_sem);
 }
@@ -257,10 +249,11 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
                         */
                        o_end->ee_block = end_ext->ee_block;
                        o_end->ee_len = end_ext->ee_len;
-                       ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+                       ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
                }
 
                o_start->ee_len = start_ext->ee_len;
+               eblock = le32_to_cpu(start_ext->ee_block);
                new_flag = 1;
 
        } else if (start_ext->ee_len && new_ext->ee_len &&
@@ -271,6 +264,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
                 * orig  |------------------------------|
                 */
                o_start->ee_len = start_ext->ee_len;
+               eblock = le32_to_cpu(start_ext->ee_block);
                new_flag = 1;
 
        } else if (!start_ext->ee_len && new_ext->ee_len &&
@@ -282,7 +276,7 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
                 */
                o_end->ee_block = end_ext->ee_block;
                o_end->ee_len = end_ext->ee_len;
-               ext4_ext_store_pblock(o_end, ext_pblock(end_ext));
+               ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
 
                /*
                 * Set 0 to the extent block if new_ext was
@@ -299,22 +293,22 @@ mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
 
        if (new_flag) {
                err = get_ext_path(orig_inode, eblock, &orig_path);
-               if (orig_path == NULL)
+               if (err)
                        goto out;
 
                if (ext4_ext_insert_extent(handle, orig_inode,
-                                       orig_path, new_ext))
+                                       orig_path, new_ext, 0))
                        goto out;
        }
 
        if (end_flag) {
                err = get_ext_path(orig_inode,
                                le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
-               if (orig_path == NULL)
+               if (err)
                        goto out;
 
                if (ext4_ext_insert_extent(handle, orig_inode,
-                                          orig_path, end_ext))
+                                          orig_path, end_ext, 0))
                        goto out;
        }
 out:
@@ -367,7 +361,7 @@ mext_insert_inside_block(struct ext4_extent *o_start,
        /* Insert new entry */
        if (new_ext->ee_len) {
                o_start[i] = *new_ext;
-               ext4_ext_store_pblock(&o_start[i++], ext_pblock(new_ext));
+               ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
        }
 
        /* Insert end entry */
@@ -484,21 +478,20 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
        struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
        struct ext4_extent new_ext, start_ext, end_ext;
        ext4_lblk_t new_ext_end;
-       ext4_fsblk_t new_phys_end;
        int oext_alen, new_ext_alen, end_ext_alen;
        int depth = ext_depth(orig_inode);
        int ret;
 
+       start_ext.ee_block = end_ext.ee_block = 0;
        o_start = o_end = oext = orig_path[depth].p_ext;
        oext_alen = ext4_ext_get_actual_len(oext);
        start_ext.ee_len = end_ext.ee_len = 0;
 
        new_ext.ee_block = cpu_to_le32(*from);
-       ext4_ext_store_pblock(&new_ext, ext_pblock(dext));
+       ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
        new_ext.ee_len = dext->ee_len;
        new_ext_alen = ext4_ext_get_actual_len(&new_ext);
        new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
-       new_phys_end = ext_pblock(&new_ext) + new_ext_alen - 1;
 
        /*
         * Case: original extent is first
@@ -511,6 +504,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
                le32_to_cpu(oext->ee_block) + oext_alen) {
                start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
                                               le32_to_cpu(oext->ee_block));
+               start_ext.ee_block = oext->ee_block;
                copy_extent_status(oext, &start_ext);
        } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
                prev_ext = oext - 1;
@@ -524,6 +518,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
                        start_ext.ee_len = cpu_to_le16(
                                ext4_ext_get_actual_len(prev_ext) +
                                new_ext_alen);
+                       start_ext.ee_block = oext->ee_block;
                        copy_extent_status(prev_ext, &start_ext);
                        new_ext.ee_len = 0;
                }
@@ -534,7 +529,15 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
         * oext      |-----------|
         * new_ext       |-------|
         */
-       BUG_ON(le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end);
+       if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
+               EXT4_ERROR_INODE(orig_inode,
+                       "new_ext_end(%u) should be less than or equal to "
+                       "oext->ee_block(%u) + oext_alen(%d) - 1",
+                       new_ext_end, le32_to_cpu(oext->ee_block),
+                       oext_alen);
+               ret = -EIO;
+               goto out;
+       }
 
        /*
         * Case: new_ext is smaller than original extent
@@ -550,7 +553,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
                copy_extent_status(oext, &end_ext);
                end_ext_alen = ext4_ext_get_actual_len(&end_ext);
                ext4_ext_store_pblock(&end_ext,
-                       (ext_pblock(o_end) + oext_alen - end_ext_alen));
+                       (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
                end_ext.ee_block =
                        cpu_to_le32(le32_to_cpu(o_end->ee_block) +
                        oext_alen - end_ext_alen);
@@ -558,6 +561,7 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
 
        ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
                                o_end, &start_ext, &new_ext, &end_ext);
+out:
        return ret;
 }
 
@@ -568,9 +572,11 @@ mext_leaf_block(handle_t *handle, struct inode *orig_inode,
  * @tmp_oext:          the extent that will belong to the donor inode
  * @orig_off:          block offset of original inode
  * @donor_off:         block offset of donor inode
- * @max_count:         the maximun length of extents
+ * @max_count:         the maximum length of extents
+ *
+ * Return 0 on success, or a negative error value on failure.
  */
-static void
+static int
 mext_calc_swap_extents(struct ext4_extent *tmp_dext,
                              struct ext4_extent *tmp_oext,
                              ext4_lblk_t orig_off, ext4_lblk_t donor_off,
@@ -579,13 +585,26 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
        ext4_lblk_t diff, orig_diff;
        struct ext4_extent dext_old, oext_old;
 
+       BUG_ON(orig_off != donor_off);
+
+       /* original and donor extents have to cover the same block offset */
+       if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
+           le32_to_cpu(tmp_oext->ee_block) +
+                       ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
+               return -ENODATA;
+
+       if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
+           le32_to_cpu(tmp_dext->ee_block) +
+                       ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
+               return -ENODATA;
+
        dext_old = *tmp_dext;
        oext_old = *tmp_oext;
 
        /* When tmp_dext is too large, pick up the target range. */
        diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
 
-       ext4_ext_store_pblock(tmp_dext, ext_pblock(tmp_dext) + diff);
+       ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
        tmp_dext->ee_block =
                        cpu_to_le32(le32_to_cpu(tmp_dext->ee_block) + diff);
        tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_dext->ee_len) - diff);
@@ -594,7 +613,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
                tmp_dext->ee_len = cpu_to_le16(max_count);
 
        orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
-       ext4_ext_store_pblock(tmp_oext, ext_pblock(tmp_oext) + orig_diff);
+       ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
 
        /* Adjust extent length if donor extent is larger than orig */
        if (ext4_ext_get_actual_len(tmp_dext) >
@@ -606,6 +625,8 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
 
        copy_extent_status(&oext_old, tmp_dext);
        copy_extent_status(&dext_old, tmp_oext);
+
+       return 0;
 }
 
 /**
@@ -616,6 +637,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
  * @donor_inode:       donor inode
  * @from:              block offset of orig_inode
  * @count:             block count to be replaced
+ * @err:               pointer to save return value
  *
  * Replace original inode extents and donor inode extents page by page.
  * We implement this replacement in the following three steps:
@@ -626,33 +648,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
  * 3. Change the block information of donor inode to point at the saved
  *    original inode blocks in the dummy extents.
  *
- * Return 0 on success, or a negative error value on failure.
+ * Return replaced block count.
  */
 static int
 mext_replace_branches(handle_t *handle, struct inode *orig_inode,
                           struct inode *donor_inode, ext4_lblk_t from,
-                          ext4_lblk_t count)
+                          ext4_lblk_t count, int *err)
 {
        struct ext4_ext_path *orig_path = NULL;
        struct ext4_ext_path *donor_path = NULL;
        struct ext4_extent *oext, *dext;
        struct ext4_extent tmp_dext, tmp_oext;
        ext4_lblk_t orig_off = from, donor_off = from;
-       int err = 0;
        int depth;
        int replaced_count = 0;
        int dext_alen;
 
-       mext_double_down_write(orig_inode, donor_inode);
+       /* Protect extent trees against block allocations via delalloc */
+       double_down_write_data_sem(orig_inode, donor_inode);
 
        /* Get the original extent for the block "orig_off" */
-       err = get_ext_path(orig_inode, orig_off, &orig_path);
-       if (orig_path == NULL)
+       *err = get_ext_path(orig_inode, orig_off, &orig_path);
+       if (*err)
                goto out;
 
        /* Get the donor extent for the head */
-       err = get_ext_path(donor_inode, donor_off, &donor_path);
-       if (donor_path == NULL)
+       *err = get_ext_path(donor_inode, donor_off, &donor_path);
+       if (*err)
                goto out;
        depth = ext_depth(orig_inode);
        oext = orig_path[depth].p_ext;
@@ -662,24 +684,39 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
        dext = donor_path[depth].p_ext;
        tmp_dext = *dext;
 
-       mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+       *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
                                      donor_off, count);
+       if (*err)
+               goto out;
 
        /* Loop for the donor extents */
        while (1) {
                /* The extent for donor must be found. */
-               BUG_ON(!dext || donor_off != le32_to_cpu(tmp_dext.ee_block));
+               if (!dext) {
+                       EXT4_ERROR_INODE(donor_inode,
+                                  "The extent for donor must be found");
+                       *err = -EIO;
+                       goto out;
+               } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
+                       EXT4_ERROR_INODE(donor_inode,
+                               "Donor offset(%u) and the first block of donor "
+                               "extent(%u) should be equal",
+                               donor_off,
+                               le32_to_cpu(tmp_dext.ee_block));
+                       *err = -EIO;
+                       goto out;
+               }
 
                /* Set donor extent to orig extent */
-               err = mext_leaf_block(handle, orig_inode,
+               *err = mext_leaf_block(handle, orig_inode,
                                           orig_path, &tmp_dext, &orig_off);
-               if (err < 0)
+               if (*err)
                        goto out;
 
                /* Set orig extent to donor extent */
-               err = mext_leaf_block(handle, donor_inode,
+               *err = mext_leaf_block(handle, donor_inode,
                                           donor_path, &tmp_oext, &donor_off);
-               if (err < 0)
+               if (*err)
                        goto out;
 
                dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -693,35 +730,26 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
 
                if (orig_path)
                        ext4_ext_drop_refs(orig_path);
-               err = get_ext_path(orig_inode, orig_off, &orig_path);
-               if (orig_path == NULL)
+               *err = get_ext_path(orig_inode, orig_off, &orig_path);
+               if (*err)
                        goto out;
                depth = ext_depth(orig_inode);
                oext = orig_path[depth].p_ext;
-               if (le32_to_cpu(oext->ee_block) +
-                               ext4_ext_get_actual_len(oext) <= orig_off) {
-                       err = 0;
-                       goto out;
-               }
                tmp_oext = *oext;
 
                if (donor_path)
                        ext4_ext_drop_refs(donor_path);
-               err = get_ext_path(donor_inode, donor_off, &donor_path);
-               if (donor_path == NULL)
+               *err = get_ext_path(donor_inode, donor_off, &donor_path);
+               if (*err)
                        goto out;
                depth = ext_depth(donor_inode);
                dext = donor_path[depth].p_ext;
-               if (le32_to_cpu(dext->ee_block) +
-                               ext4_ext_get_actual_len(dext) <= donor_off) {
-                       err = 0;
-                       goto out;
-               }
                tmp_dext = *dext;
 
-               mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
-                                             donor_off,
-                                             count - replaced_count);
+               *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
+                                          donor_off, count - replaced_count);
+               if (*err)
+                       goto out;
        }
 
 out:
@@ -734,8 +762,12 @@ out:
                kfree(donor_path);
        }
 
-       mext_double_up_write(orig_inode, donor_inode);
-       return err;
+       ext4_ext_invalidate_cache(orig_inode);
+       ext4_ext_invalidate_cache(donor_inode);
+
+       double_up_write_data_sem(orig_inode, donor_inode);
+
+       return replaced_count;
 }
 
 /**
@@ -747,16 +779,17 @@ out:
  * @data_offset_in_page:       block index where data swapping starts
  * @block_len_in_page:         the number of blocks to be swapped
  * @uninit:                    orig extent is uninitialized or not
+ * @err:                       pointer to save return value
  *
  * Save the data in original inode blocks and replace original inode extents
  * with donor inode extents by calling mext_replace_branches().
- * Finally, write out the saved data in new original inode blocks. Return 0
- * on success, or a negative error value on failure.
+ * Finally, write out the saved data in new original inode blocks. Return
+ * replaced block count.
  */
 static int
 move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
                  pgoff_t orig_page_offset, int data_offset_in_page,
-                 int block_len_in_page, int uninit)
+                 int block_len_in_page, int uninit, int *err)
 {
        struct inode *orig_inode = o_filp->f_dentry->d_inode;
        struct address_space *mapping = orig_inode->i_mapping;
@@ -768,9 +801,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
        unsigned long blocksize = orig_inode->i_sb->s_blocksize;
        unsigned int w_flags = 0;
-       unsigned int tmp_data_len, data_len;
+       unsigned int tmp_data_size, data_size, replaced_size;
        void *fsdata;
-       int ret, i, jblocks;
+       int i, jblocks;
+       int err2 = 0;
+       int replaced_count = 0;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
 
        /*
@@ -780,8 +815,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
        handle = ext4_journal_start(orig_inode, jblocks);
        if (IS_ERR(handle)) {
-               ret = PTR_ERR(handle);
-               return ret;
+               *err = PTR_ERR(handle);
+               return 0;
        }
 
        if (segment_eq(get_fs(), KERNEL_DS))
@@ -797,39 +832,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
         * Just swap data blocks between orig and donor.
         */
        if (uninit) {
-               ret = mext_replace_branches(handle, orig_inode,
-                                                donor_inode, orig_blk_offset,
-                                                block_len_in_page);
-
-               /* Clear the inode cache not to refer to the old data */
-               ext4_ext_invalidate_cache(orig_inode);
-               ext4_ext_invalidate_cache(donor_inode);
+               replaced_count = mext_replace_branches(handle, orig_inode,
+                                               donor_inode, orig_blk_offset,
+                                               block_len_in_page, err);
                goto out2;
        }
 
        offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
 
-       /* Calculate data_len */
+       /* Calculate data_size */
        if ((orig_blk_offset + block_len_in_page - 1) ==
            ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
                /* Replace the last block */
-               tmp_data_len = orig_inode->i_size & (blocksize - 1);
+               tmp_data_size = orig_inode->i_size & (blocksize - 1);
                /*
-                * If data_len equal zero, it shows data_len is multiples of
+                * If data_size equal zero, it shows data_size is multiples of
                 * blocksize. So we set appropriate value.
                 */
-               if (tmp_data_len == 0)
-                       tmp_data_len = blocksize;
+               if (tmp_data_size == 0)
+                       tmp_data_size = blocksize;
 
-               data_len = tmp_data_len +
+               data_size = tmp_data_size +
                        ((block_len_in_page - 1) << orig_inode->i_blkbits);
-       } else {
-               data_len = block_len_in_page << orig_inode->i_blkbits;
-       }
+       } else
+               data_size = block_len_in_page << orig_inode->i_blkbits;
 
-       ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags,
+       replaced_size = data_size;
+
+       *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
                                 &page, &fsdata);
-       if (unlikely(ret < 0))
+       if (unlikely(*err < 0))
                goto out;
 
        if (!PageUptodate(page)) {
@@ -850,14 +882,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
        /* Release old bh and drop refs */
        try_to_release_page(page, 0);
 
-       ret = mext_replace_branches(handle, orig_inode, donor_inode,
-                                        orig_blk_offset, block_len_in_page);
-       if (ret < 0)
-               goto out;
-
-       /* Clear the inode cache not to refer to the old data */
-       ext4_ext_invalidate_cache(orig_inode);
-       ext4_ext_invalidate_cache(donor_inode);
+       replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
+                                       orig_blk_offset, block_len_in_page,
+                                       &err2);
+       if (err2) {
+               if (replaced_count) {
+                       block_len_in_page = replaced_count;
+                       replaced_size =
+                               block_len_in_page << orig_inode->i_blkbits;
+               } else
+                       goto out;
+       }
 
        if (!page_has_buffers(page))
                create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -867,16 +902,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
                bh = bh->b_this_page;
 
        for (i = 0; i < block_len_in_page; i++) {
-               ret = ext4_get_block(orig_inode,
+               *err = ext4_get_block(orig_inode,
                                (sector_t)(orig_blk_offset + i), bh, 0);
-               if (ret < 0)
+               if (*err < 0)
                        goto out;
 
                if (bh->b_this_page != NULL)
                        bh = bh->b_this_page;
        }
 
-       ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len,
+       *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
                               page, fsdata);
        page = NULL;
 
@@ -890,18 +925,20 @@ out:
 out2:
        ext4_journal_stop(handle);
 
-       return ret < 0 ? ret : 0;
+       if (err2)
+               *err = err2;
+
+       return replaced_count;
 }
 
 /**
- * mext_check_argumants - Check whether move extent can be done
+ * mext_check_arguments - Check whether move extent can be done
  *
  * @orig_inode:                original inode
  * @donor_inode:       donor inode
  * @orig_start:                logical start offset in block for orig
  * @donor_start:       logical start offset in block for donor
  * @len:               the number of blocks to be moved
- * @moved_len:         moved block length
  *
  * Check the arguments of ext4_move_extents() whether the files can be
  * exchanged with each other.
@@ -909,21 +946,23 @@ out2:
  */
 static int
 mext_check_arguments(struct inode *orig_inode,
-                         struct inode *donor_inode, __u64 orig_start,
-                         __u64 donor_start, __u64 *len, __u64 moved_len)
+                    struct inode *donor_inode, __u64 orig_start,
+                    __u64 donor_start, __u64 *len)
 {
        ext4_lblk_t orig_blocks, donor_blocks;
        unsigned int blkbits = orig_inode->i_blkbits;
        unsigned int blocksize = 1 << blkbits;
 
-       /* Regular file check */
-       if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
-               ext4_debug("ext4 move extent: The argument files should be "
-                       "regular file [ino:orig %lu, donor %lu]\n",
-                       orig_inode->i_ino, donor_inode->i_ino);
+       if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
+               ext4_debug("ext4 move extent: suid or sgid is set"
+                          " to donor file [ino:orig %lu, donor %lu]\n",
+                          orig_inode->i_ino, donor_inode->i_ino);
                return -EINVAL;
        }
 
+       if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
+               return -EPERM;
+
        /* Ext4 move extent does not support swapfile */
        if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
                ext4_debug("ext4 move extent: The argument files should "
@@ -940,20 +979,12 @@ mext_check_arguments(struct inode *orig_inode,
                return -EINVAL;
        }
 
-       /* orig and donor should be different file */
-       if (orig_inode->i_ino == donor_inode->i_ino) {
-               ext4_debug("ext4 move extent: The argument files should not "
-                       "be same file [ino:orig %lu, donor %lu]\n",
-                       orig_inode->i_ino, donor_inode->i_ino);
-               return -EINVAL;
-       }
-
        /* Ext4 move extent supports only extent based file */
-       if (!(EXT4_I(orig_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       if (!(ext4_test_inode_flag(orig_inode, EXT4_INODE_EXTENTS))) {
                ext4_debug("ext4 move extent: orig file is not extents "
                        "based file [ino:orig %lu]\n", orig_inode->i_ino);
                return -EOPNOTSUPP;
-       } else if (!(EXT4_I(donor_inode)->i_flags & EXT4_EXTENTS_FL)) {
+       } else if (!(ext4_test_inode_flag(donor_inode, EXT4_INODE_EXTENTS))) {
                ext4_debug("ext4 move extent: donor file is not extents "
                        "based file [ino:donor %lu]\n", donor_inode->i_ino);
                return -EOPNOTSUPP;
@@ -972,19 +1003,12 @@ mext_check_arguments(struct inode *orig_inode,
                return -EINVAL;
        }
 
-       if (moved_len) {
-               ext4_debug("ext4 move extent: moved_len should be 0 "
-                       "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
-                       donor_inode->i_ino);
-               return -EINVAL;
-       }
-
-       if ((orig_start > MAX_DEFRAG_SIZE) ||
-           (donor_start > MAX_DEFRAG_SIZE) ||
-           (*len > MAX_DEFRAG_SIZE) ||
-           (orig_start + *len > MAX_DEFRAG_SIZE))  {
-               ext4_debug("ext4 move extent: Can't handle over [%lu] blocks "
-                       "[ino:orig %lu, donor %lu]\n", MAX_DEFRAG_SIZE,
+       if ((orig_start >= EXT_MAX_BLOCKS) ||
+           (donor_start >= EXT_MAX_BLOCKS) ||
+           (*len > EXT_MAX_BLOCKS) ||
+           (orig_start + *len >= EXT_MAX_BLOCKS))  {
+               ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
+                       "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
                        orig_inode->i_ino, donor_inode->i_ino);
                return -EINVAL;
        }
@@ -1035,7 +1059,7 @@ mext_check_arguments(struct inode *orig_inode,
        }
 
        if (!*len) {
-               ext4_debug("ext4 move extent: len shoudld not be 0 "
+               ext4_debug("ext4 move extent: len should not be 0 "
                        "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
                        donor_inode->i_ino);
                return -EINVAL;
@@ -1050,18 +1074,23 @@ mext_check_arguments(struct inode *orig_inode,
  * @inode1:    the inode structure
  * @inode2:    the inode structure
  *
- * Lock two inodes' i_mutex by i_ino order. This function is moved from
- * fs/inode.c.
+ * Lock two inodes' i_mutex by i_ino order.
+ * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
  */
-static void
+static int
 mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
 {
-       if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
-               if (inode1)
-                       mutex_lock(&inode1->i_mutex);
-               else if (inode2)
-                       mutex_lock(&inode2->i_mutex);
-               return;
+       int ret = 0;
+
+       BUG_ON(inode1 == NULL && inode2 == NULL);
+
+       ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
+       if (ret < 0)
+               goto out;
+
+       if (inode1 == inode2) {
+               mutex_lock(&inode1->i_mutex);
+               goto out;
        }
 
        if (inode1->i_ino < inode2->i_ino) {
@@ -1071,6 +1100,9 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
                mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
                mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
        }
+
+out:
+       return ret;
 }
 
 /**
@@ -1079,17 +1111,28 @@ mext_inode_double_lock(struct inode *inode1, struct inode *inode2)
  * @inode1:     the inode that is released first
  * @inode2:     the inode that is released second
  *
- * This function is moved from fs/inode.c.
+ * If inode1 or inode2 is NULL, return -EIO. Otherwise, return 0.
  */
 
-static void
+static int
 mext_inode_double_unlock(struct inode *inode1, struct inode *inode2)
 {
+       int ret = 0;
+
+       BUG_ON(inode1 == NULL && inode2 == NULL);
+
+       ret = mext_check_null_inode(inode1, inode2, __func__, __LINE__);
+       if (ret < 0)
+               goto out;
+
        if (inode1)
                mutex_unlock(&inode1->i_mutex);
 
        if (inode2 && inode2 != inode1)
                mutex_unlock(&inode2->i_mutex);
+
+out:
+       return ret;
 }
 
 /**
@@ -1146,70 +1189,92 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
        ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
        ext4_lblk_t rest_blocks;
        pgoff_t orig_page_offset = 0, seq_end_page;
-       int ret, depth, last_extent = 0;
+       int ret1, ret2, depth, last_extent = 0;
        int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
        int data_offset_in_page;
        int block_len_in_page;
        int uninit;
 
-       /* protect orig and donor against a truncate */
-       mext_inode_double_lock(orig_inode, donor_inode);
+       /* orig and donor should be different file */
+       if (orig_inode->i_ino == donor_inode->i_ino) {
+               ext4_debug("ext4 move extent: The argument files should not "
+                       "be same file [ino:orig %lu, donor %lu]\n",
+                       orig_inode->i_ino, donor_inode->i_ino);
+               return -EINVAL;
+       }
+
+       /* Regular file check */
+       if (!S_ISREG(orig_inode->i_mode) || !S_ISREG(donor_inode->i_mode)) {
+               ext4_debug("ext4 move extent: The argument files should be "
+                       "regular file [ino:orig %lu, donor %lu]\n",
+                       orig_inode->i_ino, donor_inode->i_ino);
+               return -EINVAL;
+       }
+
+       /* Protect orig and donor inodes against a truncate */
+       ret1 = mext_inode_double_lock(orig_inode, donor_inode);
+       if (ret1 < 0)
+               return ret1;
 
-       mext_double_down_read(orig_inode, donor_inode);
+       /* Protect extent tree against block allocations via delalloc */
+       double_down_write_data_sem(orig_inode, donor_inode);
        /* Check the filesystem environment whether move_extent can be done */
-       ret = mext_check_arguments(orig_inode, donor_inode, orig_start,
-                                       donor_start, &len, *moved_len);
-       mext_double_up_read(orig_inode, donor_inode);
-       if (ret)
-               goto out2;
+       ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
+                                   donor_start, &len);
+       if (ret1)
+               goto out;
 
        file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits;
        block_end = block_start + len - 1;
        if (file_end < block_end)
                len -= block_end - file_end;
 
-       ret = get_ext_path(orig_inode, block_start, &orig_path);
-       if (orig_path == NULL)
-               goto out2;
+       ret1 = get_ext_path(orig_inode, block_start, &orig_path);
+       if (ret1)
+               goto out;
 
        /* Get path structure to check the hole */
-       ret = get_ext_path(orig_inode, block_start, &holecheck_path);
-       if (holecheck_path == NULL)
+       ret1 = get_ext_path(orig_inode, block_start, &holecheck_path);
+       if (ret1)
                goto out;
 
        depth = ext_depth(orig_inode);
        ext_cur = holecheck_path[depth].p_ext;
-       if (ext_cur == NULL) {
-               ret = -EINVAL;
-               goto out;
-       }
 
        /*
-        * Get proper extent whose ee_block is beyond block_start
-        * if block_start was within the hole.
+        * Get proper starting location of block replacement if block_start was
+        * within the hole.
         */
        if (le32_to_cpu(ext_cur->ee_block) +
                ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
+               /*
+                * The hole exists between extents or the tail of
+                * original file.
+                */
                last_extent = mext_next_extent(orig_inode,
                                        holecheck_path, &ext_cur);
                if (last_extent < 0) {
-                       ret = last_extent;
+                       ret1 = last_extent;
                        goto out;
                }
                last_extent = mext_next_extent(orig_inode, orig_path,
                                                        &ext_dummy);
                if (last_extent < 0) {
-                       ret = last_extent;
+                       ret1 = last_extent;
                        goto out;
                }
-       }
-       seq_start = block_start;
+               seq_start = le32_to_cpu(ext_cur->ee_block);
+       } else if (le32_to_cpu(ext_cur->ee_block) > block_start)
+               /* The hole exists at the beginning of original file. */
+               seq_start = le32_to_cpu(ext_cur->ee_block);
+       else
+               seq_start = block_start;
 
        /* No blocks within the specified range. */
        if (le32_to_cpu(ext_cur->ee_block) > block_end) {
                ext4_debug("ext4 move extent: The specified range of file "
                                                        "may be the hole\n");
-               ret = -EINVAL;
+               ret1 = -EINVAL;
                goto out;
        }
 
@@ -1229,7 +1294,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                last_extent = mext_next_extent(orig_inode, holecheck_path,
                                                &ext_cur);
                if (last_extent < 0) {
-                       ret = last_extent;
+                       ret1 = last_extent;
                        break;
                }
                add_blocks = ext4_ext_get_actual_len(ext_cur);
@@ -1269,29 +1334,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                seq_start = le32_to_cpu(ext_cur->ee_block);
                rest_blocks = seq_blocks;
 
-               /* Discard preallocations of two inodes */
-               down_write(&EXT4_I(orig_inode)->i_data_sem);
-               ext4_discard_preallocations(orig_inode);
-               up_write(&EXT4_I(orig_inode)->i_data_sem);
-
-               down_write(&EXT4_I(donor_inode)->i_data_sem);
-               ext4_discard_preallocations(donor_inode);
-               up_write(&EXT4_I(donor_inode)->i_data_sem);
+               /*
+                * Up semaphore to avoid following problems:
+                * a. transaction deadlock among ext4_journal_start,
+                *    ->write_begin via pagefault, and jbd2_journal_commit
+                * b. racing with ->readpage, ->write_begin, and ext4_get_block
+                *    in move_extent_per_page
+                */
+               double_up_write_data_sem(orig_inode, donor_inode);
 
                while (orig_page_offset <= seq_end_page) {
 
                        /* Swap original branches with new branches */
-                       ret = move_extent_per_page(o_filp, donor_inode,
+                       block_len_in_page = move_extent_per_page(
+                                               o_filp, donor_inode,
                                                orig_page_offset,
                                                data_offset_in_page,
-                                               block_len_in_page, uninit);
-                       if (ret < 0)
-                               goto out;
-                       orig_page_offset++;
+                                               block_len_in_page, uninit,
+                                               &ret1);
+
                        /* Count how many blocks we have exchanged */
                        *moved_len += block_len_in_page;
-                       BUG_ON(*moved_len > len);
+                       if (ret1 < 0)
+                               break;
+                       if (*moved_len > len) {
+                               EXT4_ERROR_INODE(orig_inode,
+                                       "We replaced blocks too much! "
+                                       "sum of replaced: %llu requested: %llu",
+                                       *moved_len, len);
+                               ret1 = -EIO;
+                               break;
+                       }
 
+                       orig_page_offset++;
                        data_offset_in_page = 0;
                        rest_blocks -= block_len_in_page;
                        if (rest_blocks > blocks_per_page)
@@ -1300,19 +1375,23 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
                                block_len_in_page = rest_blocks;
                }
 
+               double_down_write_data_sem(orig_inode, donor_inode);
+               if (ret1 < 0)
+                       break;
+
                /* Decrease buffer counter */
                if (holecheck_path)
                        ext4_ext_drop_refs(holecheck_path);
-               ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
-               if (holecheck_path == NULL)
+               ret1 = get_ext_path(orig_inode, seq_start, &holecheck_path);
+               if (ret1)
                        break;
                depth = holecheck_path->p_depth;
 
                /* Decrease buffer counter */
                if (orig_path)
                        ext4_ext_drop_refs(orig_path);
-               ret = get_ext_path(orig_inode, seq_start, &orig_path);
-               if (orig_path == NULL)
+               ret1 = get_ext_path(orig_inode, seq_start, &orig_path);
+               if (ret1)
                        break;
 
                ext_cur = holecheck_path[depth].p_ext;
@@ -1321,6 +1400,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
 
        }
 out:
+       if (*moved_len) {
+               ext4_discard_preallocations(orig_inode);
+               ext4_discard_preallocations(donor_inode);
+       }
+
        if (orig_path) {
                ext4_ext_drop_refs(orig_path);
                kfree(orig_path);
@@ -1329,11 +1413,13 @@ out:
                ext4_ext_drop_refs(holecheck_path);
                kfree(holecheck_path);
        }
-out2:
-       mext_inode_double_unlock(orig_inode, donor_inode);
+       double_up_write_data_sem(orig_inode, donor_inode);
+       ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
 
-       if (ret)
-               return ret;
+       if (ret1)
+               return ret1;
+       else if (ret2)
+               return ret2;
 
        return 0;
 }