VM: add "vm_mmap()" helper function

[linux-flexiantxendom0-3.2.10.git] / mm / mmap.c
diff --git a/mm/mmap.c b/mm/mmap.c

index 4fb5464..b38b47e 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -22,7 +22,7 @@
  #include <linux/security.h>
  #include <linux/hugetlb.h>
  #include <linux/profile.h>
-#include <linux/module.h>
+#include <linux/export.h>
  #include <linux/mount.h>
  #include <linux/mempolicy.h>
  #include <linux/rmap.h>
@@ -122,9 +122,17 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                 return 0;
  
         if (sysctl_overcommit_memory == OVERCOMMIT_GUESS) {
-               unsigned long n;
+               free = global_page_state(NR_FREE_PAGES);
+               free += global_page_state(NR_FILE_PAGES);
+
+               /*
+                * shmem pages shouldn't be counted as free in this
+                * case, they can't be purged, only swapped out, and
+                * that won't affect the overall amount of available
+                * memory in the system.
+                */
+               free -= global_page_state(NR_SHMEM);
  
-               free = global_page_state(NR_FILE_PAGES);
                 free += nr_swap_pages;
  
                 /*
@@ -136,34 +144,18 @@ int __vm_enough_memory(struct mm_struct *mm, long pages, int cap_sys_admin)
                 free += global_page_state(NR_SLAB_RECLAIMABLE);
  
                 /*
-                * Leave the last 3% for root
-                */
-               if (!cap_sys_admin)
-                       free -= free / 32;
-
-               if (free > pages)
-                       return 0;
-
-               /*
-                * nr_free_pages() is very expensive on large systems,
-                * only call if we're about to fail.
-                */
-               n = nr_free_pages();
-
-               /*
                  * Leave reserved pages. The pages are not for anonymous pages.
                  */
-               if (n <= totalreserve_pages)
+               if (free <= totalreserve_pages)
                         goto error;
                 else
-                       n -= totalreserve_pages;
+                       free -= totalreserve_pages;
  
                 /*
                  * Leave the last 3% for root
                  */
                 if (!cap_sys_admin)
-                       n -= n / 32;
-               free += n;
+                       free -= free / 32;
  
                 if (free > pages)
                         return 0;
@@ -194,7 +186,7 @@ error:
  }
  
  /*
- * Requires inode->i_mapping->i_mmap_lock
+ * Requires inode->i_mapping->i_mmap_mutex
   */
  static void __remove_shared_vm_struct(struct vm_area_struct *vma,
                 struct file *file, struct address_space *mapping)
@@ -222,9 +214,9 @@ void unlink_file_vma(struct vm_area_struct *vma)
  
         if (file) {
                 struct address_space *mapping = file->f_mapping;
-               spin_lock(&mapping->i_mmap_lock);
+               mutex_lock(&mapping->i_mmap_mutex);
                 __remove_shared_vm_struct(vma, file, mapping);
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
         }
  }
  
@@ -248,6 +240,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
         return next;
  }
  
+static unsigned long do_brk(unsigned long addr, unsigned long len);
+
  SYSCALL_DEFINE1(brk, unsigned long, brk)
  {
         unsigned long rlim, retval;
@@ -398,29 +392,6 @@ find_vma_prepare(struct mm_struct *mm, unsigned long addr,
         return vma;
  }
  
-static inline void
-__vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
-               struct vm_area_struct *prev, struct rb_node *rb_parent)
-{
-       struct vm_area_struct *next;
-
-       vma->vm_prev = prev;
-       if (prev) {
-               next = prev->vm_next;
-               prev->vm_next = vma;
-       } else {
-               mm->mmap = vma;
-               if (rb_parent)
-                       next = rb_entry(rb_parent,
-                                       struct vm_area_struct, vm_rb);
-               else
-                       next = NULL;
-       }
-       vma->vm_next = next;
-       if (next)
-               next->vm_prev = vma;
-}
-
  void __vma_link_rb(struct mm_struct *mm, struct vm_area_struct *vma,
                 struct rb_node **rb_link, struct rb_node *rb_parent)
  {
@@ -468,25 +439,22 @@ static void vma_link(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vma->vm_file)
                 mapping = vma->vm_file->f_mapping;
  
-       if (mapping) {
-               spin_lock(&mapping->i_mmap_lock);
-               vma->vm_truncate_count = mapping->truncate_count;
-       }
+       if (mapping)
+               mutex_lock(&mapping->i_mmap_mutex);
  
         __vma_link(mm, vma, prev, rb_link, rb_parent);
         __vma_link_file(vma);
  
         if (mapping)
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
  
         mm->map_count++;
         validate_mm(mm);
  }
  
  /*
- * Helper for vma_adjust in the split_vma insert case:
- * insert vm structure into list and rbtree and anon_vma,
- * but it has already been inserted into prio_tree earlier.
+ * Helper for vma_adjust() in the split_vma insert case: insert a vma into the
+ * mm's list and rbtree.  It has already been inserted into the prio_tree.
   */
  static void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
  {
@@ -580,17 +548,8 @@ again:                     remove_next = 1 + (end > next->vm_end);
                 mapping = file->f_mapping;
                 if (!(vma->vm_flags & VM_NONLINEAR))
                         root = &mapping->i_mmap;
-               spin_lock(&mapping->i_mmap_lock);
-               if (importer &&
-                   vma->vm_truncate_count != next->vm_truncate_count) {
-                       /*
-                        * unmap_mapping_range might be in progress:
-                        * ensure that the expanding vma is rescanned.
-                        */
-                       importer->vm_truncate_count = 0;
-               }
+               mutex_lock(&mapping->i_mmap_mutex);
                 if (insert) {
-                       insert->vm_truncate_count = vma->vm_truncate_count;
                         /*
                          * Put into prio_tree now, so instantiated pages
                          * are visible to arm/parisc __flush_dcache_page
@@ -656,7 +615,7 @@ again:                      remove_next = 1 + (end > next->vm_end);
         if (anon_vma)
                 anon_vma_unlock(anon_vma);
         if (mapping)
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
  
         if (remove_next) {
                 if (file) {
@@ -940,14 +899,7 @@ struct anon_vma *find_mergeable_anon_vma(struct vm_area_struct *vma)
         if (anon_vma)
                 return anon_vma;
  try_prev:
-       /*
-        * It is potentially slow to have to call find_vma_prev here.
-        * But it's only on the first write fault on the vma, not
-        * every time, and we could devise a way to avoid it later
-        * (e.g. stash info in next's anon_vma_node when assigning
-        * an anon_vma, or when trying vma_merge).  Another time.
-        */
-       BUG_ON(find_vma_prev(vma->vm_mm, vma->vm_start, &near) != vma);
+       near = vma->vm_prev;
         if (!near)
                 goto none;
  
@@ -985,16 +937,29 @@ void vm_stat_account(struct mm_struct *mm, unsigned long flags,
  #endif /* CONFIG_PROC_FS */
  
  /*
+ * If a hint addr is less than mmap_min_addr change hint to be as
+ * low as possible but still greater than mmap_min_addr
+ */
+static inline unsigned long round_hint_to_min(unsigned long hint)
+{
+       hint &= PAGE_MASK;
+       if (((void *)hint != NULL) &&
+           (hint < mmap_min_addr))
+               return PAGE_ALIGN(mmap_min_addr);
+       return hint;
+}
+
+/*
   * The caller must hold down_write(&current->mm->mmap_sem).
   */
  
-unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+static unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
                         unsigned long len, unsigned long prot,
                         unsigned long flags, unsigned long pgoff)
  {
         struct mm_struct * mm = current->mm;
         struct inode *inode;
-       unsigned int vm_flags;
+       vm_flags_t vm_flags;
         int error;
         unsigned long reqprot = prot;
  
@@ -1124,7 +1089,32 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
  
         return mmap_region(file, addr, len, flags, vm_flags, pgoff);
  }
-EXPORT_SYMBOL(do_mmap_pgoff);
+
+unsigned long do_mmap(struct file *file, unsigned long addr,
+       unsigned long len, unsigned long prot,
+       unsigned long flag, unsigned long offset)
+{
+       if (unlikely(offset + PAGE_ALIGN(len) < offset))
+               return -EINVAL;
+       if (unlikely(offset & ~PAGE_MASK))
+               return -EINVAL;
+       return do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
+}
+EXPORT_SYMBOL(do_mmap);
+
+unsigned long vm_mmap(struct file *file, unsigned long addr,
+       unsigned long len, unsigned long prot,
+       unsigned long flag, unsigned long offset)
+{
+       unsigned long ret;
+       struct mm_struct *mm = current->mm;
+
+       down_write(&mm->mmap_sem);
+       ret = do_mmap(file, addr, len, prot, flag, offset);
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+EXPORT_SYMBOL(vm_mmap);
  
  SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                 unsigned long, prot, unsigned long, flags,
@@ -1148,9 +1138,9 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
                  * A dummy user value is used because we are not locking
                  * memory so no accounting is necessary
                  */
-               len = ALIGN(len, huge_page_size(&default_hstate));
-               file = hugetlb_file_setup(HUGETLB_ANON_FILE, len, VM_NORESERVE,
-                                               &user, HUGETLB_ANONHUGE_INODE);
+               file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len,
+                                               VM_NORESERVE, &user,
+                                               HUGETLB_ANONHUGE_INODE);
                 if (IS_ERR(file))
                         return PTR_ERR(file);
         }
@@ -1199,7 +1189,7 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
   */
  int vma_wants_writenotify(struct vm_area_struct *vma)
  {
-       unsigned int vm_flags = vma->vm_flags;
+       vm_flags_t vm_flags = vma->vm_flags;
  
         /* If it was private or non-writable, the write bit is already clear */
         if ((vm_flags & (VM_WRITE|VM_SHARED)) != ((VM_WRITE|VM_SHARED)))
@@ -1227,7 +1217,7 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
   * We account for memory if it's a private writeable mapping,
   * not hugepages and VM_NORESERVE wasn't set.
   */
-static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
+static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags)
  {
         /*
          * hugetlb has its own accounting separate from the core VM
@@ -1241,7 +1231,7 @@ static inline int accountable_mapping(struct file *file, unsigned int vm_flags)
  
  unsigned long mmap_region(struct file *file, unsigned long addr,
                           unsigned long len, unsigned long flags,
-                         unsigned int vm_flags, unsigned long pgoff)
+                         vm_flags_t vm_flags, unsigned long pgoff)
  {
         struct mm_struct *mm = current->mm;
         struct vm_area_struct *vma, *prev;
@@ -1284,7 +1274,7 @@ munmap_back:
          */
         if (accountable_mapping(file, vm_flags)) {
                 charged = len >> PAGE_SHIFT;
-               if (security_vm_enough_memory(charged))
+               if (security_vm_enough_memory_mm(mm, charged))
                         return -ENOMEM;
                 vm_flags |= VM_ACCOUNT;
         }
@@ -1315,8 +1305,9 @@ munmap_back:
         vma->vm_pgoff = pgoff;
         INIT_LIST_HEAD(&vma->anon_vma_chain);
  
+       error = -EINVAL;        /* when rejecting VM_GROWSDOWN|VM_GROWSUP */
+
         if (file) {
-               error = -EINVAL;
                 if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
                         goto free_vma;
                 if (vm_flags & VM_DENYWRITE) {
@@ -1342,6 +1333,8 @@ munmap_back:
                 pgoff = vma->vm_pgoff;
                 vm_flags = vma->vm_flags;
         } else if (vm_flags & VM_SHARED) {
+               if (unlikely(vm_flags & (VM_GROWSDOWN|VM_GROWSUP)))
+                       goto free_vma;
                 error = shmem_zero_setup(vma);
                 if (error)
                         goto free_vma;
@@ -1472,10 +1465,8 @@ void arch_unmap_area(struct mm_struct *mm, unsigned long addr)
         /*
          * Is this a new hole at the lowest possible address?
          */
-       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache) {
+       if (addr >= TASK_UNMAPPED_BASE && addr < mm->free_area_cache)
                 mm->free_area_cache = addr;
-               mm->cached_hole_size = ~0UL;
-       }
  }
  
  /*
@@ -1490,7 +1481,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
  {
         struct vm_area_struct *vma;
         struct mm_struct *mm = current->mm;
-       unsigned long addr = addr0;
+       unsigned long addr = addr0, start_addr;
  
         /* requested length too big for entire address space */
         if (len > TASK_SIZE)
@@ -1514,22 +1505,14 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                 mm->free_area_cache = mm->mmap_base;
         }
  
+try_again:
         /* either no address requested or can't fit in requested address hole */
-       addr = mm->free_area_cache;
+       start_addr = addr = mm->free_area_cache;
  
-       /* make sure it can fit in the remaining address space */
-       if (addr > len) {
-               vma = find_vma(mm, addr-len);
-               if (!vma || addr <= vma->vm_start)
-                       /* remember the address as a hint for next time */
-                       return (mm->free_area_cache = addr-len);
-       }
-
-       if (mm->mmap_base < len)
-               goto bottomup;
-
-       addr = mm->mmap_base-len;
+       if (addr < len)
+               goto fail;
  
+       addr -= len;
         do {
                 /*
                  * Lookup failure means no vma is above this address,
@@ -1549,7 +1532,21 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
                 addr = vma->vm_start-len;
         } while (len < vma->vm_start);
  
-bottomup:
+fail:
+       /*
+        * if hint left us with no space for the requested
+        * mapping then try again:
+        *
+        * Note: this is different with the case of bottomup
+        * which does the fully line-search, but we use find_vma
+        * here that causes some holes skipped.
+        */
+       if (start_addr != mm->mmap_base) {
+               mm->free_area_cache = mm->mmap_base;
+               mm->cached_hole_size = 0;
+               goto try_again;
+       }
+
         /*
          * A failed mmap() very likely causes application failure,
          * so fall back to the bottom-up function here. This scenario
@@ -1652,39 +1649,27 @@ struct vm_area_struct *find_vma(struct mm_struct *mm, unsigned long addr)
  
  EXPORT_SYMBOL(find_vma);
  
-/* Same as find_vma, but also return a pointer to the previous VMA in *pprev. */
+/*
+ * Same as find_vma, but also return a pointer to the previous VMA in *pprev.
+ */
  struct vm_area_struct *
  find_vma_prev(struct mm_struct *mm, unsigned long addr,
                         struct vm_area_struct **pprev)
  {
-       struct vm_area_struct *vma = NULL, *prev = NULL;
-       struct rb_node *rb_node;
-       if (!mm)
-               goto out;
-
-       /* Guard against addr being lower than the first VMA */
-       vma = mm->mmap;
-
-       /* Go through the RB tree quickly. */
-       rb_node = mm->mm_rb.rb_node;
-
-       while (rb_node) {
-               struct vm_area_struct *vma_tmp;
-               vma_tmp = rb_entry(rb_node, struct vm_area_struct, vm_rb);
+       struct vm_area_struct *vma;
  
-               if (addr < vma_tmp->vm_end) {
-                       rb_node = rb_node->rb_left;
-               } else {
-                       prev = vma_tmp;
-                       if (!prev->vm_next || (addr < prev->vm_next->vm_end))
-                               break;
+       vma = find_vma(mm, addr);
+       if (vma) {
+               *pprev = vma->vm_prev;
+       } else {
+               struct rb_node *rb_node = mm->mm_rb.rb_node;
+               *pprev = NULL;
+               while (rb_node) {
+                       *pprev = rb_entry(rb_node, struct vm_area_struct, vm_rb);
                         rb_node = rb_node->rb_right;
                 }
         }
-
-out:
-       *pprev = prev;
-       return prev ? prev->vm_next : vma;
+       return vma;
  }
  
  /*
@@ -1797,7 +1782,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
  /*
   * vma is the first one with address < vma->vm_start.  Have to extend vma.
   */
-static int expand_downwards(struct vm_area_struct *vma,
+int expand_downwards(struct vm_area_struct *vma,
                                    unsigned long address)
  {
         int error;
@@ -1844,11 +1829,6 @@ static int expand_downwards(struct vm_area_struct *vma,
         return error;
  }
  
-int expand_stack_downwards(struct vm_area_struct *vma, unsigned long address)
-{
-       return expand_downwards(vma, address);
-}
-
  #ifdef CONFIG_STACK_GROWSUP
  int expand_stack(struct vm_area_struct *vma, unsigned long address)
  {
@@ -1931,17 +1911,17 @@ static void unmap_region(struct mm_struct *mm,
                 unsigned long start, unsigned long end)
  {
         struct vm_area_struct *next = prev? prev->vm_next: mm->mmap;
-       struct mmu_gather *tlb;
+       struct mmu_gather tlb;
         unsigned long nr_accounted = 0;
  
         lru_add_drain();
-       tlb = tlb_gather_mmu(mm, 0);
+       tlb_gather_mmu(&tlb, mm, 0);
         update_hiwater_rss(mm);
         unmap_vmas(&tlb, vma, start, end, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
-       free_pgtables(tlb, vma, prev? prev->vm_end: FIRST_USER_ADDRESS,
-                                next? next->vm_start: 0);
-       tlb_finish_mmu(tlb, start, end);
+       free_pgtables(&tlb, vma, prev ? prev->vm_end : FIRST_USER_ADDRESS,
+                                next ? next->vm_start : 0);
+       tlb_finish_mmu(&tlb, start, end);
  }
  
  /*
@@ -2083,9 +2063,10 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
                 return -EINVAL;
  
         /* Find the first overlapping VMA */
-       vma = find_vma_prev(mm, start, &prev);
+       vma = find_vma(mm, start);
         if (!vma)
                 return 0;
+       prev = vma->vm_prev;
         /* we have  start < vma->vm_end  */
  
         /* if it doesn't overlap, we have nothing.. */
@@ -2151,21 +2132,24 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len)
  
         return 0;
  }
-
  EXPORT_SYMBOL(do_munmap);
  
-SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+int vm_munmap(struct mm_struct *mm, unsigned long start, size_t len)
  {
         int ret;
-       struct mm_struct *mm = current->mm;
-
-       profile_munmap(addr);
  
         down_write(&mm->mmap_sem);
-       ret = do_munmap(mm, addr, len);
+       ret = do_munmap(mm, start, len);
         up_write(&mm->mmap_sem);
         return ret;
  }
+EXPORT_SYMBOL(vm_munmap);
+
+SYSCALL_DEFINE2(munmap, unsigned long, addr, size_t, len)
+{
+       profile_munmap(addr);
+       return vm_munmap(current->mm, addr, len);
+}
  
  static inline void verify_mm_writelocked(struct mm_struct *mm)
  {
@@ -2182,7 +2166,7 @@ static inline void verify_mm_writelocked(struct mm_struct *mm)
   *  anonymous maps.  eventually we may be able to do some
   *  brk-specific accounting here.
   */
-unsigned long do_brk(unsigned long addr, unsigned long len)
+static unsigned long do_brk(unsigned long addr, unsigned long len)
  {
         struct mm_struct * mm = current->mm;
         struct vm_area_struct * vma, * prev;
@@ -2242,7 +2226,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
         if (mm->map_count > sysctl_max_map_count)
                 return -ENOMEM;
  
-       if (security_vm_enough_memory(len >> PAGE_SHIFT))
+       if (security_vm_enough_memory_mm(mm, len >> PAGE_SHIFT))
                 return -ENOMEM;
  
         /* Can we just expand an old private anonymous mapping? */
@@ -2278,15 +2262,24 @@ out:
         return addr;
  }
  
-EXPORT_SYMBOL(do_brk);
+unsigned long vm_brk(unsigned long addr, unsigned long len)
+{
+       struct mm_struct *mm = current->mm;
+       unsigned long ret;
+
+       down_write(&mm->mmap_sem);
+       ret = do_brk(addr, len);
+       up_write(&mm->mmap_sem);
+       return ret;
+}
+EXPORT_SYMBOL(vm_brk);
  
  /* Release all mmaps. */
  void exit_mmap(struct mm_struct *mm)
  {
-       struct mmu_gather *tlb;
+       struct mmu_gather tlb;
         struct vm_area_struct *vma;
         unsigned long nr_accounted = 0;
-       unsigned long end;
  
         /* mm's last user has gone, and its about to be pulled down */
         mmu_notifier_release(mm);
@@ -2308,14 +2301,14 @@ void exit_mmap(struct mm_struct *mm)
  
         lru_add_drain();
         flush_cache_mm(mm);
-       tlb = tlb_gather_mmu(mm, 1);
+       tlb_gather_mmu(&tlb, mm, 1);
         /* update_hiwater_rss(mm) here? but nobody should be looking */
         /* Use -1 here to ensure all VMAs in the mm are unmapped */
-       end = unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
+       unmap_vmas(&tlb, vma, 0, -1, &nr_accounted, NULL);
         vm_unacct_memory(nr_accounted);
  
-       free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
-       tlb_finish_mmu(tlb, 0, end);
+       free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, 0);
+       tlb_finish_mmu(&tlb, 0, -1);
  
         /*
          * Walk the list again, actually closing and freeing it,
@@ -2329,7 +2322,7 @@ void exit_mmap(struct mm_struct *mm)
  
  /* Insert vm structure into process list sorted by address
   * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_lock is taken here.
+ * then i_mmap_mutex is taken here.
   */
  int insert_vm_struct(struct mm_struct * mm, struct vm_area_struct * vma)
  {
@@ -2375,13 +2368,16 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
         struct vm_area_struct *new_vma, *prev;
         struct rb_node **rb_link, *rb_parent;
         struct mempolicy *pol;
+       bool faulted_in_anon_vma = true;
  
         /*
          * If anonymous vma has not yet been faulted, update new pgoff
          * to match new location, to increase its chance of merging.
          */
-       if (!vma->vm_file && !vma->anon_vma)
+       if (unlikely(!vma->vm_file && !vma->anon_vma)) {
                 pgoff = addr >> PAGE_SHIFT;
+               faulted_in_anon_vma = false;
+       }
  
         find_vma_prepare(mm, addr, &prev, &rb_link, &rb_parent);
         new_vma = vma_merge(mm, prev, addr, addr + len, vma->vm_flags,
@@ -2390,9 +2386,24 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
                 /*
                  * Source vma may have been merged into new_vma
                  */
-               if (vma_start >= new_vma->vm_start &&
-                   vma_start < new_vma->vm_end)
+               if (unlikely(vma_start >= new_vma->vm_start &&
+                            vma_start < new_vma->vm_end)) {
+                       /*
+                        * The only way we can get a vma_merge with
+                        * self during an mremap is if the vma hasn't
+                        * been faulted in yet and we were allowed to
+                        * reset the dst vma->vm_pgoff to the
+                        * destination address of the mremap to allow
+                        * the merge to happen. mremap must change the
+                        * vm_pgoff linearity between src and dst vmas
+                        * (in turn preventing a vma_merge) to be
+                        * safe. It is only safe to keep the vm_pgoff
+                        * linear if there are no pages mapped yet.
+                        */
+                       VM_BUG_ON(faulted_in_anon_vma);
                         *vmap = new_vma;
+               } else
+                       anon_vma_moveto_tail(new_vma);
         } else {
                 new_vma = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
                 if (new_vma) {
@@ -2541,15 +2552,15 @@ static void vm_lock_anon_vma(struct mm_struct *mm, struct anon_vma *anon_vma)
                  * The LSB of head.next can't change from under us
                  * because we hold the mm_all_locks_mutex.
                  */
-               spin_lock_nest_lock(&anon_vma->root->lock, &mm->mmap_sem);
+               mutex_lock_nest_lock(&anon_vma->root->mutex, &mm->mmap_sem);
                 /*
                  * We can safely modify head.next after taking the
-                * anon_vma->root->lock. If some other vma in this mm shares
+                * anon_vma->root->mutex. If some other vma in this mm shares
                  * the same anon_vma we won't take it again.
                  *
                  * No need of atomic instructions here, head.next
                  * can't change from under us thanks to the
-                * anon_vma->root->lock.
+                * anon_vma->root->mutex.
                  */
                 if (__test_and_set_bit(0, (unsigned long *)
                                        &anon_vma->root->head.next))
@@ -2571,7 +2582,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
                  */
                 if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
                         BUG();
-               spin_lock_nest_lock(&mapping->i_mmap_lock, &mm->mmap_sem);
+               mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
         }
  }
  
@@ -2598,7 +2609,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
   * vma in this mm is backed by the same anon_vma or address_space.
   *
   * We can take all the locks in random order because the VM code
- * taking i_mmap_lock or anon_vma->lock outside the mmap_sem never
+ * taking i_mmap_mutex or anon_vma->mutex outside the mmap_sem never
   * takes more than one of them in a row. Secondly we're protected
   * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
   *
@@ -2611,7 +2622,6 @@ int mm_take_all_locks(struct mm_struct *mm)
  {
         struct vm_area_struct *vma;
         struct anon_vma_chain *avc;
-       int ret = -EINTR;
  
         BUG_ON(down_read_trylock(&mm->mmap_sem));
  
@@ -2632,13 +2642,11 @@ int mm_take_all_locks(struct mm_struct *mm)
                                 vm_lock_anon_vma(mm, avc->anon_vma);
         }
  
-       ret = 0;
+       return 0;
  
  out_unlock:
-       if (ret)
-               mm_drop_all_locks(mm);
-
-       return ret;
+       mm_drop_all_locks(mm);
+       return -EINTR;
  }
  
  static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
@@ -2654,7 +2662,7 @@ static void vm_unlock_anon_vma(struct anon_vma *anon_vma)
                  *
                  * No need of atomic instructions here, head.next
                  * can't change from under us until we release the
-                * anon_vma->root->lock.
+                * anon_vma->root->mutex.
                  */
                 if (!__test_and_clear_bit(0, (unsigned long *)
                                           &anon_vma->root->head.next))
@@ -2670,7 +2678,7 @@ static void vm_unlock_mapping(struct address_space *mapping)
                  * AS_MM_ALL_LOCKS can't change to 0 from under us
                  * because we hold the mm_all_locks_mutex.
                  */
-               spin_unlock(&mapping->i_mmap_lock);
+               mutex_unlock(&mapping->i_mmap_mutex);
                 if (!test_and_clear_bit(AS_MM_ALL_LOCKS,
                                         &mapping->flags))
                         BUG();