- disable prio-tree feature for now (does not compile).
authorAndreas Gruenbacher <agruen@suse.de>
Mon, 29 Mar 2004 17:03:13 +0000 (17:03 +0000)
committerAndreas Gruenbacher <agruen@suse.de>
Mon, 29 Mar 2004 17:03:13 +0000 (17:03 +0000)
suse-commit: 992fab34fc46bb23aa13ae1f112242fb65108d29

21 files changed:
fs/exec.c
fs/hugetlbfs/inode.c
fs/inode.c
fs/locks.c
fs/proc/task_mmu.c
include/linux/fs.h
include/linux/mm.h
include/linux/prio_tree.h [deleted file]
init/main.c
kernel/fork.c
mm/Makefile
mm/filemap.c
mm/fremap.c
mm/memory.c
mm/mmap.c
mm/mremap.c
mm/objrmap.c
mm/prio_tree.c [deleted file]
mm/shmem.c
mm/swap_state.c
mm/vmscan.c

index b716efe..58d60e5 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -438,7 +438,7 @@ int setup_arg_pages(struct linux_binprm *bprm, int executable_stack)
                mpnt->vm_pgoff = mpnt->vm_start >> PAGE_SHIFT;
                mpnt->vm_file = NULL;
                mpol_set_vma_default(mpnt);
-               INIT_VMA_SHARED(mpnt);
+               INIT_LIST_HEAD(&mpnt->shared);
                /* insert_vm_struct takes care of anon_vma_node */
                mpnt->anon_vma = NULL;
                mpnt->vm_private_data = (void *) 0;
index 8721331..66919c1 100644 (file)
@@ -265,13 +265,11 @@ static void hugetlbfs_drop_inode(struct inode *inode)
  * vma->vm_pgoff is in PAGE_SIZE units.
  */
 static void
-hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
+hugetlb_vmtruncate_list(struct list_head *list, unsigned long h_pgoff)
 {
        struct vm_area_struct *vma;
-       struct prio_tree_iter iter;
 
-       vma = __vma_prio_tree_first(root, &iter, h_pgoff, ULONG_MAX);
-       while (vma) {
+       list_for_each_entry(vma, list, shared) {
                unsigned long h_vm_pgoff;
                unsigned long v_length;
                unsigned long h_length;
@@ -303,8 +301,6 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
                zap_hugepage_range(vma,
                                vma->vm_start + v_offset,
                                v_length - v_offset);
-
-               vma = __vma_prio_tree_next(vma, root, &iter, h_pgoff, ULONG_MAX);
        }
 }
 
@@ -324,11 +320,9 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 
        inode->i_size = offset;
        down(&mapping->i_shared_sem);
-       /* Protect against page fault */
-       atomic_inc(&mapping->truncate_count);
-       if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+       if (!list_empty(&mapping->i_mmap))
                hugetlb_vmtruncate_list(&mapping->i_mmap, pgoff);
-       if (unlikely(!prio_tree_empty(&mapping->i_mmap_shared)))
+       if (!list_empty(&mapping->i_mmap_shared))
                hugetlb_vmtruncate_list(&mapping->i_mmap_shared, pgoff);
        up(&mapping->i_shared_sem);
        truncate_hugepages(mapping, offset);
index f2a2244..b3d65f4 100644 (file)
@@ -186,9 +186,8 @@ void inode_init_once(struct inode *inode)
        atomic_set(&inode->i_data.truncate_count, 0);
        INIT_LIST_HEAD(&inode->i_data.private_list);
        spin_lock_init(&inode->i_data.private_lock);
-       INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap);
-       INIT_PRIO_TREE_ROOT(&inode->i_data.i_mmap_shared);
-       INIT_LIST_HEAD(&inode->i_data.i_mmap_nonlinear);
+       INIT_LIST_HEAD(&inode->i_data.i_mmap);
+       INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
        spin_lock_init(&inode->i_lock);
        i_size_ordered_init(inode);
 }
index 9ba8d16..c6a6010 100644 (file)
@@ -1455,8 +1455,8 @@ int fcntl_setlk(struct file *filp, unsigned int cmd, struct flock __user *l)
        if (IS_MANDLOCK(inode) &&
            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
                struct address_space *mapping = filp->f_mapping;
-               if (!prio_tree_empty(&mapping->i_mmap_shared) ||
-                       !list_empty(&mapping->i_mmap_nonlinear)) {
+
+               if (!list_empty(&mapping->i_mmap_shared)) {
                        error = -EAGAIN;
                        goto out;
                }
@@ -1593,8 +1593,8 @@ int fcntl_setlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l)
        if (IS_MANDLOCK(inode) &&
            (inode->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) {
                struct address_space *mapping = filp->f_mapping;
-               if (!prio_tree_empty(&mapping->i_mmap_shared) ||
-                       !list_empty(&mapping->i_mmap_nonlinear)) {
+
+               if (!list_empty(&mapping->i_mmap_shared)) {
                        error = -EAGAIN;
                        goto out;
                }
index 005cc0d..1a7440a 100644 (file)
@@ -65,7 +65,7 @@ int task_statm(struct mm_struct *mm, int *shared, int *text,
                                *shared += pages;
                        continue;
                }
-               if (vma->vm_flags & VM_SHARED || !vma_shared_empty(vma))
+               if (vma->vm_flags & VM_SHARED || !list_empty(&vma->shared))
                        *shared += pages;
                if (vma->vm_flags & VM_EXECUTABLE)
                        *text += pages;
index b56ed2f..61f6f1d 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/stat.h>
 #include <linux/cache.h>
 #include <linux/radix-tree.h>
-#include <linux/prio_tree.h>
 #include <linux/kobject.h>
 #include <asm/atomic.h>
 
@@ -328,9 +327,8 @@ struct address_space {
        spinlock_t              tree_lock;      /* and spinlock protecting it */
        unsigned long           nrpages;        /* number of total pages */
        struct address_space_operations *a_ops; /* methods */
-       struct prio_tree_root   i_mmap;         /* tree of private mappings */
-       struct prio_tree_root   i_mmap_shared;  /* tree of shared mappings */
-       struct list_head        i_mmap_nonlinear;/*list of nonlinear mappings */
+       struct list_head        i_mmap;         /* list of private mappings */
+       struct list_head        i_mmap_shared;  /* list of shared mappings */
        struct semaphore        i_shared_sem;   /* protect both above lists */
        atomic_t                truncate_count; /* Cover race condition with truncate */
        unsigned long           dirtied_when;   /* jiffies of first page dirtying */
index 80dc497..81ef479 100644 (file)
@@ -11,7 +11,6 @@
 #include <linux/list.h>
 #include <linux/mmzone.h>
 #include <linux/rbtree.h>
-#include <linux/prio_tree.h>
 #include <linux/fs.h>
 #include <linux/mempolicy.h>
 
@@ -88,28 +87,7 @@ struct vm_area_struct {
         * one of the address_space->i_mmap{,shared} lists,
         * for shm areas, the list of attaches, otherwise unused.
         */
-       union {
-               struct {
-                       struct list_head list;
-                       void *parent;
-               } vm_set;
-
-               struct prio_tree_node  prio_tree_node;
-
-               struct {
-                       void *first;
-                       void *second;
-                       void *parent;
-               } both;
-       } shared;
-
-       /*
-        * shared.vm_set : list of vmas that map exactly the same set of pages
-        * vm_set_head   : head of the vm_set list
-        *
-        * TODO: try to shove the following field into vm_private_data ??
-        */
-       struct vm_area_struct *vm_set_head;
+       struct list_head shared;
 
        /*
         * The same vma can be both queued into the i_mmap and in a
@@ -187,150 +165,6 @@ struct vm_area_struct {
 #define VM_RandomReadHint(v)           ((v)->vm_flags & VM_RAND_READ)
 
 /*
- * The following macros are used for implementing prio_tree for i_mmap{_shared}
- */
-
-#define        RADIX_INDEX(vma)  ((vma)->vm_pgoff)
-#define        VMA_SIZE(vma)     (((vma)->vm_end - (vma)->vm_start) >> PAGE_SHIFT)
-/* avoid overflow */
-#define        HEAP_INDEX(vma)   ((vma)->vm_pgoff + (VMA_SIZE(vma) - 1))
-
-#define GET_INDEX_VMA(vma, radix, heap)                \
-do {                                           \
-       radix = RADIX_INDEX(vma);               \
-       heap = HEAP_INDEX(vma);                 \
-} while (0)
-
-#define GET_INDEX(node, radix, heap)           \
-do {                                           \
-       struct vm_area_struct *__tmp =          \
-         prio_tree_entry(node, struct vm_area_struct, shared.prio_tree_node);\
-       GET_INDEX_VMA(__tmp, radix, heap);      \
-} while (0)
-
-#define        INIT_VMA_SHARED_LIST(vma)                       \
-do {                                                   \
-       INIT_LIST_HEAD(&(vma)->shared.vm_set.list);     \
-       (vma)->shared.vm_set.parent = NULL;             \
-       (vma)->vm_set_head = NULL;                      \
-} while (0)
-
-#define INIT_VMA_SHARED(vma)                   \
-do {                                           \
-       (vma)->shared.both.first = NULL;        \
-       (vma)->shared.both.second = NULL;       \
-       (vma)->shared.both.parent = NULL;       \
-       (vma)->vm_set_head = NULL;              \
-} while (0)
-
-extern void __vma_prio_tree_insert(struct prio_tree_root *,
-       struct vm_area_struct *);
-
-extern void __vma_prio_tree_remove(struct prio_tree_root *,
-       struct vm_area_struct *);
-
-static inline int vma_shared_empty(struct vm_area_struct *vma)
-{
-       return vma->shared.both.first == NULL;
-}
-
-/*
- * Helps to add a new vma that maps the same (identical) set of pages as the
- * old vma to an i_mmap tree.
- */
-static inline void __vma_prio_tree_add(struct vm_area_struct *vma,
-       struct vm_area_struct *old)
-{
-       INIT_VMA_SHARED_LIST(vma);
-
-       /* Leave these BUG_ONs till prio_tree patch stabilizes */
-       BUG_ON(RADIX_INDEX(vma) != RADIX_INDEX(old));
-       BUG_ON(HEAP_INDEX(vma) != HEAP_INDEX(old));
-
-       if (old->shared.both.parent) {
-               if (old->vm_set_head) {
-                       list_add_tail(&vma->shared.vm_set.list,
-                                       &old->vm_set_head->shared.vm_set.list);
-                       return;
-               }
-               else {
-                       old->vm_set_head = vma;
-                       vma->vm_set_head = old;
-               }
-       }
-       else
-               list_add(&vma->shared.vm_set.list, &old->shared.vm_set.list);
-}
-
-/*
- * We cannot modify vm_start, vm_end, vm_pgoff fields of a vma that has been
- * already present in an i_mmap{_shared} tree without modifying the tree. The
- * following helper function should be used when such modifications are
- * necessary. We should hold the mapping's i_shared_sem.
- *
- * This function can be (micro)optimized for some special cases (maybe later).
- */
-static inline void __vma_modify(struct prio_tree_root *root,
-       struct vm_area_struct *vma, unsigned long start, unsigned long end,
-       unsigned long pgoff)
-{
-       if (root)
-               __vma_prio_tree_remove(root, vma);
-       vma->vm_start = start;
-       vma->vm_end = end;
-       vma->vm_pgoff = pgoff;
-       if (root)
-               __vma_prio_tree_insert(root, vma);
-}
-
-/*
- * Helper functions to enumerate vmas that map a given file page or a set of
- * contiguous file pages. The functions return vmas that at least map a single
- * page in the given range of contiguous file pages.
- */
-static inline struct vm_area_struct *__vma_prio_tree_first(
-       struct prio_tree_root *root, struct prio_tree_iter *iter,
-       unsigned long begin, unsigned long end)
-{
-       struct prio_tree_node *ptr;
-
-       ptr = prio_tree_first(root, iter, begin, end);
-
-       if (ptr)
-               return prio_tree_entry(ptr, struct vm_area_struct,
-                               shared.prio_tree_node);
-       else
-               return NULL;
-}
-
-static inline struct vm_area_struct *__vma_prio_tree_next(
-       struct vm_area_struct *vma, struct prio_tree_root *root,
-       struct prio_tree_iter *iter, unsigned long begin, unsigned long end)
-{
-       struct prio_tree_node *ptr;
-       struct vm_area_struct *next;
-
-       if (vma->shared.both.parent) {
-               if (vma->vm_set_head)
-                       return vma->vm_set_head;
-       }
-       else {
-               next = list_entry(vma->shared.vm_set.list.next,
-                               struct vm_area_struct, shared.vm_set.list);
-               if (!(next->vm_set_head))
-                       return next;
-       }
-
-       ptr = prio_tree_next(root, iter, begin, end);
-
-       if (ptr)
-               return prio_tree_entry(ptr, struct vm_area_struct,
-                               shared.prio_tree_node);
-       else
-               return NULL;
-}
-
-/*
  * mapping from the currently active vm_flags protection bits (the
  * low four bits) to a page protection mask..
  */
diff --git a/include/linux/prio_tree.h b/include/linux/prio_tree.h
deleted file mode 100644 (file)
index 3108fe0..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef _LINUX_PRIO_TREE_H
-#define _LINUX_PRIO_TREE_H
-
-struct prio_tree_node {
-       struct prio_tree_node *left;
-       struct prio_tree_node *right;
-       struct prio_tree_node *parent;
-};
-
-struct prio_tree_root {
-       struct prio_tree_node   *prio_tree_node;
-       unsigned int            index_bits;
-};
-
-struct prio_tree_iter {
-       struct prio_tree_node   *cur;
-       unsigned long           mask;
-       unsigned long           value;
-       int                     size_level;
-};
-
-#define PRIO_TREE_ROOT (struct prio_tree_root) {NULL, 1}
-
-#define PRIO_TREE_ROOT_INIT    {NULL, 1}
-
-#define INIT_PRIO_TREE_ROOT(ptr)       \
-do {                                   \
-       (ptr)->prio_tree_node = NULL;   \
-       (ptr)->index_bits = 1;          \
-} while (0)
-
-#define PRIO_TREE_NODE_INIT(name)      {&(name), &(name), &(name)}
-
-#define PRIO_TREE_NODE(name) \
-       struct prio_tree_node name = PRIO_TREE_NODE_INIT(name)
-
-#define INIT_PRIO_TREE_NODE(ptr)                               \
-do {                                                           \
-       (ptr)->left = (ptr)->right = (ptr)->parent = (ptr);     \
-} while (0)
-
-#define        prio_tree_entry(ptr, type, member) \
-       ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
-
-#define        PRIO_TREE_ITER  (struct prio_tree_iter) {NULL, 0UL, 0UL, 0}
-
-static inline int prio_tree_empty(const struct prio_tree_root *root)
-{
-       return root->prio_tree_node == NULL;
-}
-
-static inline int prio_tree_root(const struct prio_tree_node *node)
-{
-       return node->parent == node;
-}
-
-static inline int prio_tree_left_empty(const struct prio_tree_node *node)
-{
-       return node->left == node;
-}
-
-static inline int prio_tree_right_empty(const struct prio_tree_node *node)
-{
-       return node->right == node;
-}
-
-extern struct prio_tree_node *prio_tree_insert(struct prio_tree_root *,
-       struct prio_tree_node *);
-
-extern void prio_tree_remove(struct prio_tree_root *, struct prio_tree_node *);
-
-extern struct prio_tree_node *prio_tree_first(struct prio_tree_root *,
-       struct prio_tree_iter *, unsigned long, unsigned long);
-
-extern struct prio_tree_node *prio_tree_next(struct prio_tree_root *,
-       struct prio_tree_iter *, unsigned long, unsigned long);
-
-#endif
index a57749a..7696e2d 100644 (file)
@@ -93,7 +93,6 @@ extern void pidhash_init(void);
 extern void pidmap_init(void);
 extern void anon_vma_init(void);
 extern void radix_tree_init(void);
-extern void prio_tree_init(void);
 extern void free_initmem(void);
 extern void populate_rootfs(void);
 extern void driver_init(void);
@@ -497,7 +496,6 @@ asmlinkage void __init start_kernel(void)
        calibrate_delay();
        pidmap_init();
        pgtable_cache_init();
-       prio_tree_init();
        anon_vma_init();
 
 #ifdef CONFIG_KDB
index 579c083..006a4b5 100644 (file)
@@ -330,7 +330,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
                tmp->vm_mm = mm;
                tmp->vm_next = NULL;
                file = tmp->vm_file;
-               INIT_VMA_SHARED(tmp);
+               INIT_LIST_HEAD(&tmp->shared);
                if (file) {
                        struct inode *inode = file->f_dentry->d_inode;
                        get_file(file);
@@ -339,7 +339,7 @@ static inline int dup_mmap(struct mm_struct * mm, struct mm_struct * oldmm)
       
                        /* insert tmp into the share list, just after mpnt */
                        down(&file->f_mapping->i_shared_sem);
-                       __vma_prio_tree_add(tmp, mpnt);
+                       list_add_tail(&tmp->shared, &mpnt->shared);
                        up(&file->f_mapping->i_shared_sem);
                }
 
index c8b77b8..19f4b73 100644 (file)
@@ -9,8 +9,7 @@ mmu-$(CONFIG_MMU)       := fremap.o highmem.o madvise.o memory.o mincore.o \
 
 obj-y                  := bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
                           page_alloc.o page-writeback.o pdflush.o readahead.o \
-                          slab.o swap.o truncate.o vmscan.o prio_tree.o \
-                          $(mmu-y)
+                          slab.o swap.o truncate.o vmscan.o $(mmu-y)
 
 obj-$(CONFIG_SWAP)     += page_io.o swap_state.o swapfile.o
 obj-$(CONFIG_PROC_MM)  += proc_mm.o
index f881670..7e94837 100644 (file)
@@ -749,8 +749,7 @@ page_ok:
                 * virtual addresses, take care about potential aliasing
                 * before reading the page on the kernel side.
                 */
-               if (!prio_tree_empty(&mapping->i_mmap_shared) ||
-                       !list_empty(&mapping->i_mmap_nonlinear))
+               if (!list_empty(&mapping->i_mmap_shared))
                        flush_dcache_page(page);
 
                /*
index 391a03a..1bc8368 100644 (file)
@@ -151,8 +151,6 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
        unsigned long __prot, unsigned long pgoff, unsigned long flags)
 {
        struct mm_struct *mm = current->mm;
-       struct address_space *mapping;
-       unsigned long linear_pgoff;
        unsigned long end = start + size;
        struct vm_area_struct *vma;
        int err = -EINVAL;
@@ -189,19 +187,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
                        end > start && start >= vma->vm_start &&
                                end <= vma->vm_end) {
 
-               linear_pgoff = vma->vm_pgoff;
-               linear_pgoff +=  ((start - vma->vm_start) >> PAGE_SHIFT);
                /* Must set VM_NONLINEAR before any pages are populated. */
-               if (pgoff != linear_pgoff && !(vma->vm_flags & VM_NONLINEAR)) {
-                       mapping = vma->vm_file->f_mapping;
-                       down(&mapping->i_shared_sem);
+               if (pgoff != ((start - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff)
                        vma->vm_flags |= VM_NONLINEAR;
-                       __vma_prio_tree_remove(&mapping->i_mmap_shared, vma);
-                       INIT_VMA_SHARED_LIST(vma);
-                       list_add_tail(&vma->shared.vm_set.list,
-                                       &mapping->i_mmap_nonlinear);
-                       up(&mapping->i_shared_sem);
-               }
 
                /* ->populate can take a long time, so downgrade the lock. */
                downgrade_write(&mm->mmap_sem);
index a6d96bd..8fc8cda 100644 (file)
@@ -1081,11 +1081,11 @@ no_new_page:
  * An hlen of zero blows away the entire portion file after hba.
  */
 static void
-invalidate_mmap_range_list(struct prio_tree_root *root,
+invalidate_mmap_range_list(struct list_head *head,
                           unsigned long const hba,
                           unsigned long const hlen)
 {
-       struct prio_tree_iter iter;
+       struct list_head *curr;
        unsigned long hea;      /* last page of hole. */
        unsigned long vba;
        unsigned long vea;      /* last page of corresponding uva hole. */
@@ -1096,16 +1096,17 @@ invalidate_mmap_range_list(struct prio_tree_root *root,
        hea = hba + hlen - 1;   /* avoid overflow. */
        if (hea < hba)
                hea = ULONG_MAX;
-       vp = __vma_prio_tree_first(root, &iter, hba, hea);
-       while(vp) {
+       list_for_each(curr, head) {
+               vp = list_entry(curr, struct vm_area_struct, shared);
                vba = vp->vm_pgoff;
                vea = vba + ((vp->vm_end - vp->vm_start) >> PAGE_SHIFT) - 1;
+               if (hea < vba || vea < hba)
+                       continue;       /* Mapping disjoint from hole. */
                zba = (hba <= vba) ? vba : hba;
                zea = (vea <= hea) ? vea : hea;
                zap_page_range(vp,
                               ((zba - vba) << PAGE_SHIFT) + vp->vm_start,
                               (zea - zba + 1) << PAGE_SHIFT);
-               vp = __vma_prio_tree_next(vp, root, &iter, hba, hea);
        }
 }
 
@@ -1140,9 +1141,9 @@ void invalidate_mmap_range(struct address_space *mapping,
        down(&mapping->i_shared_sem);
        /* Protect against page fault */
        atomic_inc(&mapping->truncate_count);
-       if (unlikely(!prio_tree_empty(&mapping->i_mmap)))
+       if (unlikely(!list_empty(&mapping->i_mmap)))
                invalidate_mmap_range_list(&mapping->i_mmap, hba, hlen);
-       if (unlikely(!prio_tree_empty(&mapping->i_mmap_shared)))
+       if (unlikely(!list_empty(&mapping->i_mmap_shared)))
                invalidate_mmap_range_list(&mapping->i_mmap_shared, hba, hlen);
        up(&mapping->i_shared_sem);
 }
index 403a7b4..9205870 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -75,20 +75,12 @@ EXPORT_SYMBOL(vm_committed_space);
  * Requires inode->i_mapping->i_shared_sem
  */
 static void
-__remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode,
-                         struct address_space * mapping)
+__remove_shared_vm_struct(struct vm_area_struct *vma, struct inode *inode)
 {
        if (inode) {
                if (vma->vm_flags & VM_DENYWRITE)
                        atomic_inc(&inode->i_writecount);
-               if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
-                       list_del_init(&vma->shared.vm_set.list);
-                       INIT_VMA_SHARED(vma);
-               }
-               else if (vma->vm_flags & VM_SHARED)
-                       __vma_prio_tree_remove(&mapping->i_mmap_shared, vma);
-               else
-                       __vma_prio_tree_remove(&mapping->i_mmap, vma);
+               list_del_init(&vma->shared);
        }
 }
 
@@ -102,8 +94,7 @@ static void remove_shared_vm_struct(struct vm_area_struct *vma)
        if (file) {
                struct address_space *mapping = file->f_mapping;
                down(&mapping->i_shared_sem);
-               __remove_shared_vm_struct(vma, file->f_dentry->d_inode,
-                               mapping);
+               __remove_shared_vm_struct(vma, file->f_dentry->d_inode);
                up(&mapping->i_shared_sem);
        }
 }
@@ -277,15 +268,10 @@ static inline void __vma_link_file(struct vm_area_struct *vma)
                if (vma->vm_flags & VM_DENYWRITE)
                        atomic_dec(&file->f_dentry->d_inode->i_writecount);
 
-               if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
-                       INIT_VMA_SHARED_LIST(vma);
-                       list_add_tail(&vma->shared.vm_set.list,
-                                       &mapping->i_mmap_nonlinear);
-               }
-               else if (vma->vm_flags & VM_SHARED)
-                       __vma_prio_tree_insert(&mapping->i_mmap_shared, vma);
+               if (vma->vm_flags & VM_SHARED)
+                       list_add_tail(&vma->shared, &mapping->i_mmap_shared);
                else
-                       __vma_prio_tree_insert(&mapping->i_mmap, vma);
+                       list_add_tail(&vma->shared, &mapping->i_mmap);
        }
 }
 
@@ -363,6 +349,17 @@ static inline int is_mergeable_vma(struct vm_area_struct *vma,
 }
 
 /*
+ * Requires that the relevant i_shared_sem and anon_vma_lock
+ * be held by the caller.
+ */
+static void move_vma_start(struct vm_area_struct *vma, unsigned long addr)
+{
+       /* we must update pgoff even if no vm_file for the anon_vma */
+       vma->vm_pgoff += (long) (addr - vma->vm_start) >> PAGE_SHIFT;
+       vma->vm_start = addr;
+}
+
+/*
  * Return true if we can merge this (vm_flags,file,vm_pgoff,size)
  * in front of (at a lower virtual address and file offset than) the vma.
  *
@@ -426,9 +423,7 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
                        struct mempolicy *policy) 
 {
        struct inode *inode = file ? file->f_dentry->d_inode : NULL;
-       struct address_space *mapping = file ? file->f_mapping : NULL;
        struct semaphore *i_shared_sem;
-       struct prio_tree_root *root = NULL;
 
        /*
         * We later require that vma->vm_flags == vm_flags, so this tests
@@ -439,15 +434,6 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
 
        i_shared_sem = file ? &file->f_mapping->i_shared_sem : NULL;
 
-       if (mapping) {
-               if (vm_flags & VM_SHARED) {
-                       if (likely(!(vm_flags & VM_NONLINEAR)))
-                               root = &mapping->i_mmap_shared;
-               }
-               else
-                       root = &mapping->i_mmap;
-       }
-
        if (!prev) {
                prev = rb_entry(rb_parent, struct vm_area_struct, vm_rb);
                goto merge_next;
@@ -462,32 +448,38 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
                struct vm_area_struct *next;
 
                /*
+                * this can happen outside the i_shared_sem and outside
+                * the anon_vma_lock since it only enlarge the size of
+                * the vma, there are no ptes mapped in this new extended
+                * region anyways.
+                */
+               prev->vm_end = end;
+
+               /*
                 * OK, it did.  Can we now merge in the successor as well?
                 */
                next = prev->vm_next;
                /* next cannot change under us, it's serialized by the mmap_sem */
-               if (next && end == next->vm_start &&
+               if (next && prev->vm_end == next->vm_start &&
                                mpol_equal(prev->vm_policy, next->vm_policy) &&
                                can_vma_merge_before(prev, next, vm_flags, file,
                                        pgoff, (end - addr) >> PAGE_SHIFT)) {
+                       /*
+                        * the vm_end extension on the right can happen as usual
+                        * outside the i_shared_sem/anon_vma_lock.
+                        */
+                       prev->vm_end = next->vm_end;
+
                        /* serialized by the mmap_sem */
                        __vma_unlink(mm, next, prev);
 
                        if (file)
                                down(i_shared_sem);
-                       __vma_modify(root, prev, prev->vm_start,
-                                       next->vm_end, prev->vm_pgoff);
-
-                       __remove_shared_vm_struct(next, inode, mapping);
+                       __remove_shared_vm_struct(next, inode);
                        if (file)
                                up(i_shared_sem);
 
-                       /*
-                        * The anon_vma_lock is taken inside and
-                        * we can race with the vm_end move on the right,
-                        * that will not be a problem, moves on the right
-                        * of vm_end are controlled races.
-                        */
+                       /* the anon_vma_lock is taken inside */
                        anon_vma_merge(prev, next);
 
                        if (file)
@@ -498,19 +490,6 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
                        kmem_cache_free(vm_area_cachep, next);
                        return 1;
                }
-
-               /*
-                * this can happen outside the anon_vma_lock since it only
-                * enlarge the size of the vma, there are no ptes mapped in
-                * this new extended region anyways. As usual this is a move
-                * on the right of the vm_end.
-                */
-               if (file)
-                       down(i_shared_sem);
-               __vma_modify(root, prev, prev->vm_start, end, prev->vm_pgoff);
-               if (file)
-                       up(i_shared_sem);
-
                return 1;
        }
 
@@ -529,8 +508,7 @@ static int vma_merge(struct mm_struct *mm, struct vm_area_struct *prev,
                        if (file)
                                down(i_shared_sem);
                        anon_vma_lock(prev);
-                       __vma_modify(root, prev, addr, prev->vm_end,
-                               prev->vm_pgoff - ((end - addr) >> PAGE_SHIFT));
+                       move_vma_start(prev, addr);
                        anon_vma_unlock(prev);
                        if (file)
                                up(i_shared_sem);
@@ -718,7 +696,7 @@ munmap_back:
        vma->vm_private_data = NULL;
        vma->vm_next = NULL;
        mpol_set_vma_default(vma);
-       INIT_VMA_SHARED(vma);
+       INIT_LIST_HEAD(&vma->shared);
        vma->anon_vma = NULL;
 
        if (file) {
@@ -1263,7 +1241,6 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
 {
        struct vm_area_struct *new;
        struct address_space *mapping = NULL;
-       struct prio_tree_root *root = NULL;
 
        if (mm->map_count >= MAX_MAP_COUNT)
                return -ENOMEM;
@@ -1275,7 +1252,7 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
        /* most fields are the same, copy all, and then fixup */
        *new = *vma;
 
-       INIT_VMA_SHARED(new);
+       INIT_LIST_HEAD(&new->shared);
 
        if (new_below)
                new->vm_end = addr;
@@ -1296,27 +1273,18 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
        if (new->vm_ops && new->vm_ops->open)
                new->vm_ops->open(new);
 
-       if (vma->vm_file) {
+       if (vma->vm_file)
                 mapping = vma->vm_file->f_mapping;
 
-                if (vma->vm_flags & VM_SHARED) {
-                        if (likely(!(vma->vm_flags & VM_NONLINEAR)))
-                               root = &mapping->i_mmap_shared;
-                }
-                else
-                        root = &mapping->i_mmap;
-       }
-
        if (mapping)
                down(&mapping->i_shared_sem);
        spin_lock(&mm->page_table_lock);
        anon_vma_lock(vma);
 
        if (new_below)
-               __vma_modify(root, vma, addr, vma->vm_end,
-                       vma->vm_pgoff + ((addr - new->vm_start) >> PAGE_SHIFT));
+               move_vma_start(vma, addr);
        else
-               __vma_modify(root, vma, vma->vm_start, addr, vma->vm_pgoff);
+               vma->vm_end = addr;
 
        __insert_vm_struct(mm, new);
 
@@ -1494,7 +1462,7 @@ unsigned long do_brk(unsigned long addr, unsigned long len)
        vma->vm_file = NULL;
        vma->vm_private_data = NULL;
        mpol_set_vma_default(vma);
-       INIT_VMA_SHARED(vma);
+       INIT_LIST_HEAD(&vma->shared);
        vma->anon_vma = NULL;
 
        vma_link(mm, vma, prev, rb_link, rb_parent);
index d23e7bd..d5a549a 100644 (file)
@@ -251,7 +251,7 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                        new_vma->vm_policy =mpol_copy(new_vma->vm_policy);
                        if (IS_ERR(new_vma->vm_policy))
                                goto out_vma;
-                       INIT_VMA_SHARED(new_vma);
+                       INIT_LIST_HEAD(&new_vma->shared);
                        new_vma->vm_start = new_addr;
                        new_vma->vm_end = new_addr+new_len;
                        new_vma->vm_pgoff += (addr-vma->vm_start) >> PAGE_SHIFT;
@@ -310,8 +310,6 @@ unsigned long do_mremap(unsigned long addr,
        unsigned long flags, unsigned long new_addr)
 {
        struct vm_area_struct *vma;
-       struct address_space *mapping = NULL;
-       struct prio_tree_root *root = NULL;
        unsigned long ret = -EINVAL;
        unsigned long charged = 0;
 
@@ -419,26 +417,9 @@ unsigned long do_mremap(unsigned long addr,
                /* can we just expand the current mapping? */
                if (max_addr - addr >= new_len) {
                        int pages = (new_len - old_len) >> PAGE_SHIFT;
-
-                       if (vma->vm_file) {
-                               mapping = vma->vm_file->f_mapping;
-                               if (vma->vm_flags & VM_SHARED) {
-                                       if (likely(!(vma->vm_flags & VM_NONLINEAR)))
-                                               root = &mapping->i_mmap_shared;
-                               }
-                               else
-                                       root = &mapping->i_mmap;
-                               down(&mapping->i_shared_sem);
-                       }
-
                        spin_lock(&vma->vm_mm->page_table_lock);
-                       __vma_modify(root, vma, vma->vm_start,
-                                       addr + new_len, vma->vm_pgoff);
+                       vma->vm_end = addr + new_len;
                        spin_unlock(&vma->vm_mm->page_table_lock);
-
-                       if(mapping)
-                               up(&mapping->i_shared_sem);
-
                        current->mm->total_vm += pages;
                        if (vma->vm_flags & VM_LOCKED) {
                                current->mm->locked_vm += pages;
index 507b68b..2fb410c 100644 (file)
@@ -79,8 +79,8 @@ find_pte(struct vm_area_struct *vma, struct page *page, unsigned long *addr)
 
        loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
        address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT);
-       if (unlikely(address < vma->vm_start || address >= vma->vm_end))
-               goto out_wrong_vma;
+       if (address < vma->vm_start || address >= vma->vm_end)
+               goto out;
 
        pgd = pgd_offset(mm, address);
        if (!pgd_present(*pgd))
@@ -106,10 +106,6 @@ out_unmap:
        pte_unmap(pte);
 out:
        return NULL;
-
-out_wrong_vma:
-       BUG_ON(!PageAnon(page));
-       goto out;
 }
 
 /**
@@ -133,10 +129,8 @@ page_referenced_one(struct vm_area_struct *vma, struct page *page)
         * Tracking the referenced info is too expensive
         * for nonlinear mappings.
         */
-       if (unlikely(vma->vm_flags & VM_NONLINEAR)) {
-               BUG();
+       if (vma->vm_flags & VM_NONLINEAR)
                goto out;
-       }
 
        if (unlikely(!spin_trylock(&mm->page_table_lock)))
                goto out;
@@ -172,8 +166,6 @@ page_referenced_inode(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct vm_area_struct *vma;
-       struct prio_tree_iter iter;
-       unsigned long loffset;
        int referenced = 0;
 
        BUG_ON(PageSwapCache(page));
@@ -181,22 +173,11 @@ page_referenced_inode(struct page *page)
        if (unlikely(down_trylock(&mapping->i_shared_sem)))
                goto out;
 
-       loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
-
-       vma = __vma_prio_tree_first(&mapping->i_mmap, &iter, loffset, loffset);
-       while (vma) {
+       list_for_each_entry(vma, &mapping->i_mmap, shared)
                referenced += page_referenced_one(vma, page);
-               vma = __vma_prio_tree_next(vma, &mapping->i_mmap, &iter,
-                               loffset, loffset);
-       }
 
-       vma = __vma_prio_tree_first(&mapping->i_mmap_shared, &iter, loffset,
-                       loffset);
-       while (vma) {
+       list_for_each_entry(vma, &mapping->i_mmap_shared, shared)
                referenced += page_referenced_one(vma, page);
-               vma = __vma_prio_tree_next(vma, &mapping->i_mmap_shared, &iter,
-                               loffset, loffset);
-       }
 
        up(&mapping->i_shared_sem);
  out:
@@ -607,8 +588,6 @@ try_to_unmap_inode(struct page *page)
 {
        struct address_space *mapping = page->mapping;
        struct vm_area_struct *vma;
-       struct prio_tree_iter iter;
-       unsigned long loffset;
        int ret = SWAP_AGAIN, young = 0;
 
        BUG_ON(PageSwapCache(page));
@@ -616,28 +595,13 @@ try_to_unmap_inode(struct page *page)
        if (unlikely(down_trylock(&mapping->i_shared_sem)))
                return ret;
        
-       loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
-
-       vma = __vma_prio_tree_first(&mapping->i_mmap, &iter, loffset, loffset);
-       while (vma) {
-               ret = try_to_unmap_one(vma, page, &young);
-               if (ret == SWAP_FAIL || !page->mapcount)
-                       goto out;
-               vma = __vma_prio_tree_next(vma, &mapping->i_mmap, &iter,
-                               loffset, loffset);
-       }
-
-       vma = __vma_prio_tree_first(&mapping->i_mmap_shared, &iter, loffset,
-                       loffset);
-       while (vma) {
+       list_for_each_entry(vma, &mapping->i_mmap, shared) {
                ret = try_to_unmap_one(vma, page, &young);
                if (ret == SWAP_FAIL || !page->mapcount)
                        goto out;
-               vma = __vma_prio_tree_next(vma, &mapping->i_mmap_shared, &iter,
-                               loffset, loffset);
        }
 
-       list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) {
+       list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
                ret = try_to_unmap_one(vma, page, &young);
                if (ret == SWAP_FAIL || !page->mapcount)
                        goto out;
diff --git a/mm/prio_tree.c b/mm/prio_tree.c
deleted file mode 100644 (file)
index cdddde4..0000000
+++ /dev/null
@@ -1,574 +0,0 @@
-/*
- * mm/prio_tree.c - priority search tree for mapping->i_mmap{,_shared}
- *
- * Copyright (C) 2004, Rajesh Venkatasubramanian <vrajesh@umich.edu>
- *
- * Based on the radix priority search tree proposed by Edward M. McCreight
- * SIAM Journal of Computing, vol. 14, no.2, pages 257-276, May 1985
- *
- * 02Feb2004   Initial version
- */
-
-#include <linux/init.h>
-#include <linux/mm.h>
-#include <linux/prio_tree.h>
-
-/*
- * A clever mix of heap and radix trees forms a radix priority search tree (PST)
- * which is useful for storing intervals, e.g, we can consider a vma as a closed
- * interval of file pages [offset_begin, offset_end], and store all vmas that
- * map a file in a PST. Then, using the PST, we can answer a stabbing query,
- * i.e., selecting a set of stored intervals (vmas) that overlap with (map) a
- * given input interval X (a set of consecutive file pages), in "O(log n + m)"
- * time where 'log n' is the height of the PST, and 'm' is the number of stored
- * intervals (vmas) that overlap (map) with the input interval X (the set of
- * consecutive file pages).
- *
- * In our implementation, we store closed intervals of the form [radix_index,
- * heap_index]. We assume that always radix_index <= heap_index. McCreight's PST
- * is designed for storing intervals with unique radix indices, i.e., each
- * interval have different radix_index. However, this limitation can be easily
- * overcome by using the size, i.e., heap_index - radix_index, as part of the
- * index, so we index the tree using [(radix_index,size), heap_index].
- *
- * When the above-mentioned indexing scheme is used, theoretically, in a 32 bit
- * machine, the maximum height of a PST can be 64. We can use a balanced version
- * of the priority search tree to optimize the tree height, but the balanced
- * tree proposed by McCreight is too complex and memory-hungry for our purpose.
- */
-
-static unsigned long index_bits_to_maxindex[BITS_PER_LONG];
-
-/*
- * Maximum heap_index that can be stored in a PST with index_bits bits
- */
-static inline unsigned long prio_tree_maxindex(unsigned int bits)
-{
-       return index_bits_to_maxindex[bits - 1];
-}
-
-/*
- * Extend a priority search tree so that it can store a node with heap_index
- * max_heap_index. In the worst case, this algorithm takes O((log n)^2).
- * However, this function is used rarely and the common case performance is
- * not bad.
- */
-static struct prio_tree_node *prio_tree_expand(struct prio_tree_root *root,
-       struct prio_tree_node *node, unsigned long max_heap_index)
-{
-       struct prio_tree_node *first = NULL, *prev, *last = NULL;
-
-       if (max_heap_index > prio_tree_maxindex(root->index_bits))
-               root->index_bits++;
-
-       while (max_heap_index > prio_tree_maxindex(root->index_bits)) {
-               root->index_bits++;
-
-               if (prio_tree_empty(root))
-                       continue;
-
-               if (first == NULL) {
-                       first = root->prio_tree_node;
-                       prio_tree_remove(root, root->prio_tree_node);
-                       INIT_PRIO_TREE_NODE(first);
-                       last = first;
-               }
-               else {
-                       prev = last;
-                       last = root->prio_tree_node;
-                       prio_tree_remove(root, root->prio_tree_node);
-                       INIT_PRIO_TREE_NODE(last);
-                       prev->left = last;
-                       last->parent = prev;
-               }
-       }
-
-       INIT_PRIO_TREE_NODE(node);
-
-       if (first) {
-               node->left = first;
-               first->parent = node;
-       }
-       else
-               last = node;
-
-       if (!prio_tree_empty(root)) {
-               last->left = root->prio_tree_node;
-               last->left->parent = last;
-       }
-
-       root->prio_tree_node = node;
-       return node;
-}
-
-/*
- * Replace a prio_tree_node with a new node and return the old node
- */
-static inline struct prio_tree_node *prio_tree_replace(
-       struct prio_tree_root *root, struct prio_tree_node *old,
-       struct prio_tree_node *node)
-{
-       INIT_PRIO_TREE_NODE(node);
-
-       if (prio_tree_root(old)) {
-               BUG_ON(root->prio_tree_node != old);
-               /*
-                * We can reduce root->index_bits here. However, it is complex
-                * and does not help much to improve performance (IMO).
-                */
-               node->parent = node;
-               root->prio_tree_node = node;
-       }
-       else {
-               node->parent = old->parent;
-               if (old->parent->left == old)
-                       old->parent->left = node;
-               else {
-                       BUG_ON(old->parent->right != old);
-                       old->parent->right = node;
-               }
-       }
-
-       if (!prio_tree_left_empty(old)) {
-               node->left = old->left;
-               old->left->parent = node;
-       }
-
-       if (!prio_tree_right_empty(old)) {
-               node->right = old->right;
-               old->right->parent = node;
-       }
-
-       return old;
-}
-
-#undef swap
-#define        swap(x,y,z)     do {z = x; x = y; y = z; } while (0)
-
-/*
- * Insert a prio_tree_node @node into a radix priority search tree @root. The
- * algorithm typically takes O(log n) time where 'log n' is the number of bits
- * required to represent the maximum heap_index. In the worst case, the algo
- * can take O((log n)^2) - check prio_tree_expand.
- *
- * If a prior node with same radix_index and heap_index is already found in
- * the tree, then returns the address of the prior node. Otherwise, inserts
- * @node into the tree and returns @node.
- */
-
-struct prio_tree_node *prio_tree_insert(struct prio_tree_root *root,
-                       struct prio_tree_node *node)
-{
-       struct prio_tree_node *cur, *res = node;
-       unsigned long radix_index, heap_index;
-       unsigned long r_index, h_index, index, mask;
-       int size_flag = 0;
-
-       GET_INDEX(node, radix_index, heap_index);
-
-       if (prio_tree_empty(root) ||
-                       heap_index > prio_tree_maxindex(root->index_bits))
-               return prio_tree_expand(root, node, heap_index);
-
-       cur = root->prio_tree_node;
-       mask = 1UL << (root->index_bits - 1);
-
-       while (mask) {
-               GET_INDEX(cur, r_index, h_index);
-
-               if (r_index == radix_index && h_index == heap_index)
-                       return cur;
-
-                if (h_index < heap_index || (h_index == heap_index &&
-                                               r_index > radix_index))
-               {
-                       struct prio_tree_node *tmp = node;
-                       node = prio_tree_replace(root, cur, node);
-                       cur = tmp;
-                       swap(r_index, radix_index, index);
-                       swap(h_index, heap_index, index);
-               }
-
-               if (size_flag)
-                       index = heap_index - radix_index;
-               else
-                       index = radix_index;
-
-               if (index & mask) {
-                       if (prio_tree_right_empty(cur)) {
-                               INIT_PRIO_TREE_NODE(node);
-                               cur->right = node;
-                               node->parent = cur;
-                               return res;
-                       }
-                       else
-                               cur = cur->right;
-               }
-               else {
-                       if (prio_tree_left_empty(cur)) {
-                               INIT_PRIO_TREE_NODE(node);
-                               cur->left = node;
-                               node->parent = cur;
-                               return res;
-                       }
-                       else
-                               cur = cur->left;
-               }
-
-               mask >>= 1;
-
-               if (!mask) {
-                       mask = 1UL << (root->index_bits - 1);
-                       size_flag = 1;
-               }
-       }
-       /* Should not reach here */
-       BUG();
-       return NULL;
-}
-
-/*
- * Remove a prio_tree_node @node from a radix priority search tree @root. The
- * algorithm takes O(log n) time where 'log n' is the number of bits required
- * to represent the maximum heap_index.
- */
-
-void prio_tree_remove(struct prio_tree_root *root, struct prio_tree_node *node)
-{
-       struct prio_tree_node *cur;
-       unsigned long r_index, h_index_right, h_index_left;
-
-       cur = node;
-
-       while (!prio_tree_left_empty(cur) || !prio_tree_right_empty(cur)) {
-               if (!prio_tree_left_empty(cur))
-                       GET_INDEX(cur->left, r_index, h_index_left);
-               else {
-                       cur = cur->right;
-                       continue;
-               }
-
-               if (!prio_tree_right_empty(cur))
-                       GET_INDEX(cur->right, r_index, h_index_right);
-               else {
-                       cur = cur->left;
-                       continue;
-               }
-
-               /* both h_index_left and h_index_right cannot be 0 */
-               if (h_index_left >= h_index_right)
-                       cur = cur->left;
-               else
-                       cur = cur->right;
-       }
-
-       if (prio_tree_root(cur)) {
-               BUG_ON(root->prio_tree_node != cur);
-               *root = PRIO_TREE_ROOT;
-               return;
-       }
-
-       if (cur->parent->right == cur)
-               cur->parent->right = cur->parent;
-       else {
-               BUG_ON(cur->parent->left != cur);
-               cur->parent->left = cur->parent;
-       }
-
-       while (cur != node)
-               cur = prio_tree_replace(root, cur->parent, cur);
-
-       return;
-}
-
-/*
- * Following functions help to enumerate all prio_tree_nodes in the tree that
- * overlap with the input interval X [radix_index, heap_index]. The enumeration
- * takes O(log n + m) time where 'log n' is the height of the tree (which is
- * proportional to # of bits required to represent the maximum heap_index) and
- * 'm' is the number of prio_tree_nodes that overlap the interval X.
- */
-
-static inline struct prio_tree_node *__prio_tree_left(
-       struct prio_tree_root *root, struct prio_tree_iter *iter,
-       unsigned long radix_index, unsigned long heap_index,
-       unsigned long *r_index, unsigned long *h_index)
-{
-       if (prio_tree_left_empty(iter->cur))
-               return NULL;
-
-       GET_INDEX(iter->cur->left, *r_index, *h_index);
-
-       if (radix_index <= *h_index) {
-               iter->cur = iter->cur->left;
-               iter->mask >>= 1;
-               if (iter->mask) {
-                       if (iter->size_level)
-                               iter->size_level++;
-               }
-               else {
-                       iter->size_level = 1;
-                       iter->mask = 1UL << (root->index_bits - 1);
-               }
-               return iter->cur;
-       }
-
-       return NULL;
-}
-
-
-static inline struct prio_tree_node *__prio_tree_right(
-       struct prio_tree_root *root, struct prio_tree_iter *iter,
-       unsigned long radix_index, unsigned long heap_index,
-       unsigned long *r_index, unsigned long *h_index)
-{
-       unsigned long value;
-
-       if (prio_tree_right_empty(iter->cur))
-               return NULL;
-
-       if (iter->size_level)
-               value = iter->value;
-       else
-               value = iter->value | iter->mask;
-
-       if (heap_index < value)
-               return NULL;
-
-       GET_INDEX(iter->cur->right, *r_index, *h_index);
-
-       if (radix_index <= *h_index) {
-               iter->cur = iter->cur->right;
-               iter->mask >>= 1;
-               iter->value = value;
-               if (iter->mask) {
-                       if (iter->size_level)
-                               iter->size_level++;
-               }
-               else {
-                       iter->size_level = 1;
-                       iter->mask = 1UL << (root->index_bits - 1);
-               }
-               return iter->cur;
-       }
-
-       return NULL;
-}
-
-static inline struct prio_tree_node *__prio_tree_parent(
-       struct prio_tree_iter *iter)
-{
-       iter->cur = iter->cur->parent;
-       iter->mask <<= 1;
-       if (iter->size_level) {
-               if (iter->size_level == 1)
-                       iter->mask = 1UL;
-               iter->size_level--;
-       }
-       else if (iter->value & iter->mask)
-               iter->value ^= iter->mask;
-       return iter->cur;
-}
-
-static inline int overlap(unsigned long radix_index, unsigned long heap_index,
-       unsigned long r_index, unsigned long h_index)
-{
-       if (heap_index < r_index || radix_index > h_index)
-               return 0;
-
-       return 1;
-}
-
-/*
- * prio_tree_first:
- *
- * Get the first prio_tree_node that overlaps with the interval [radix_index,
- * heap_index]. Note that always radix_index <= heap_index. We do a pre-order
- * traversal of the tree.
- */
-struct prio_tree_node *prio_tree_first(struct prio_tree_root *root,
-       struct prio_tree_iter *iter, unsigned long radix_index,
-       unsigned long heap_index)
-{
-       unsigned long r_index, h_index;
-
-       *iter = PRIO_TREE_ITER;
-
-       if (prio_tree_empty(root))
-               return NULL;
-
-       GET_INDEX(root->prio_tree_node, r_index, h_index);
-
-       if (radix_index > h_index)
-               return NULL;
-
-       iter->mask = 1UL << (root->index_bits - 1);
-       iter->cur = root->prio_tree_node;
-
-       while (1) {
-               if (overlap(radix_index, heap_index, r_index, h_index))
-                       return iter->cur;
-
-               if (__prio_tree_left(root, iter, radix_index, heap_index,
-                                       &r_index, &h_index))
-                       continue;
-
-               if (__prio_tree_right(root, iter, radix_index, heap_index,
-                                       &r_index, &h_index))
-                       continue;
-
-               break;
-       }
-       return NULL;
-}
-
-/* Get the next prio_tree_node that overlaps with the input interval in iter */
-struct prio_tree_node *prio_tree_next(struct prio_tree_root *root,
-       struct prio_tree_iter *iter, unsigned long radix_index,
-       unsigned long heap_index)
-{
-       unsigned long r_index, h_index;
-
-repeat:
-       while (__prio_tree_left(root, iter, radix_index, heap_index,
-                               &r_index, &h_index))
-               if (overlap(radix_index, heap_index, r_index, h_index))
-                       return iter->cur;
-
-       while (!__prio_tree_right(root, iter, radix_index, heap_index,
-                               &r_index, &h_index)) {
-               while (!prio_tree_root(iter->cur) &&
-                               iter->cur->parent->right == iter->cur)
-                       __prio_tree_parent(iter);
-
-               if (prio_tree_root(iter->cur))
-                       return NULL;
-
-               __prio_tree_parent(iter);
-       }
-
-       if (overlap(radix_index, heap_index, r_index, h_index))
-                       return iter->cur;
-
-       goto repeat;
-}
-
-/*
- * Radix priority search tree for address_space->i_mmap_{_shared}
- *
- * For each vma that map a unique set of file pages i.e., unique [radix_index,
- * heap_index] value, we have a corresponing priority search tree node. If
- * multiple vmas have identical [radix_index, heap_index] value, then one of
- * them is used as a tree node and others are stored in a vm_set list. The tree
- * node points to the first vma (head) of the list using vm_set_head.
- *
- * prio_tree_root
- *      |
- *      A       vm_set_head
- *     / \      /
- *    L   R -> H-I-J-K-M-N-O-P-Q-S
- *    ^   ^    <-- vm_set.list -->
- *  tree nodes
- *
- * We need some way to identify whether a vma is a tree node, head of a vm_set
- * list, or just a member of a vm_set list. We cannot use vm_flags to store
- * such information. The reason is, in the above figure, it is possible that
- * vm_flags' of R and H are covered by the different mmap_sems. When R is
- * removed under R->mmap_sem, H replaces R as a tree node. Since we do not hold
- * H->mmap_sem, we cannot use H->vm_flags for marking that H is a tree node now.
- * That's why some trick involving shared.both.parent is used for identifying
- * tree nodes and list head nodes. We can possibly use the least significant
- * bit of the vm_set_head field to mark tree and list head nodes. I was worried
- * about the alignment of vm_area_struct in various architectures.
- *
- * vma radix priority search tree node rules:
- *
- * vma->shared.both.parent != NULL     ==>     a tree node
- *
- * vma->shared.both.parent == NULL
- *     vma->vm_set_head != NULL  ==>  list head of vmas that map same pages
- *     vma->vm_set_head == NULL  ==>  a list node
- */
-
-void __vma_prio_tree_insert(struct prio_tree_root *root,
-       struct vm_area_struct *vma)
-{
-       struct prio_tree_node *ptr;
-       struct vm_area_struct *old;
-
-       ptr = prio_tree_insert(root, &vma->shared.prio_tree_node);
-
-       if (ptr == &vma->shared.prio_tree_node) {
-               vma->vm_set_head = NULL;
-               return;
-       }
-
-       old = prio_tree_entry(ptr, struct vm_area_struct,
-                       shared.prio_tree_node);
-
-       __vma_prio_tree_add(vma, old);
-}
-
-void __vma_prio_tree_remove(struct prio_tree_root *root,
-       struct vm_area_struct *vma)
-{
-       struct vm_area_struct *node, *head, *new_head;
-
-       if (vma->shared.both.parent == NULL && vma->vm_set_head == NULL) {
-               list_del_init(&vma->shared.vm_set.list);
-               INIT_VMA_SHARED(vma);
-               return;
-       }
-
-       if (vma->vm_set_head) {
-               /* Leave this BUG_ON till prio_tree patch stabilizes */
-               BUG_ON(vma->vm_set_head->vm_set_head != vma);
-               if (vma->shared.both.parent) {
-                       head = vma->vm_set_head;
-                       if (!list_empty(&head->shared.vm_set.list)) {
-                               new_head = list_entry(
-                                       head->shared.vm_set.list.next,
-                                       struct vm_area_struct,
-                                       shared.vm_set.list);
-                               list_del_init(&head->shared.vm_set.list);
-                       }
-                       else
-                               new_head = NULL;
-
-                       prio_tree_replace(root, &vma->shared.prio_tree_node,
-                                       &head->shared.prio_tree_node);
-                       head->vm_set_head = new_head;
-                       if (new_head)
-                               new_head->vm_set_head = head;
-
-               }
-               else {
-                       node = vma->vm_set_head;
-                       if (!list_empty(&vma->shared.vm_set.list)) {
-                               new_head = list_entry(
-                                       vma->shared.vm_set.list.next,
-                                       struct vm_area_struct,
-                                       shared.vm_set.list);
-                               list_del_init(&vma->shared.vm_set.list);
-                               node->vm_set_head = new_head;
-                               new_head->vm_set_head = node;
-                       }
-                       else
-                               node->vm_set_head = NULL;
-               }
-               INIT_VMA_SHARED(vma);
-               return;
-       }
-
-       prio_tree_remove(root, &vma->shared.prio_tree_node);
-       INIT_VMA_SHARED(vma);
-}
-
-void __init prio_tree_init(void)
-{
-       unsigned int i;
-
-       for (i = 0; i < ARRAY_SIZE(index_bits_to_maxindex) - 1; i++)
-               index_bits_to_maxindex[i] = (1UL << (i + 1)) - 1;
-       index_bits_to_maxindex[ARRAY_SIZE(index_bits_to_maxindex) - 1] = ~0UL;
-}
index 87db56f..6e625fd 100644 (file)
@@ -1413,8 +1413,7 @@ static void do_shmem_file_read(struct file *filp, loff_t *ppos, read_descriptor_
                         * virtual addresses, take care about potential aliasing
                         * before reading the page on the kernel side.
                         */
-                       if (!prio_tree_empty(&mapping->i_mmap_shared) ||
-                               !list_empty(&mapping->i_mmap_nonlinear))
+                       if (!list_empty(&mapping->i_mmap_shared))
                                flush_dcache_page(page);
                        /*
                         * Mark the page accessed if we read the beginning.
index f33585f..f7f0647 100644 (file)
@@ -29,9 +29,8 @@ struct address_space swapper_space = {
        .tree_lock      = SPIN_LOCK_UNLOCKED,
        .a_ops          = &swap_aops,
        .backing_dev_info = &swap_backing_dev_info,
-       .i_mmap         = PRIO_TREE_ROOT_INIT,
-       .i_mmap_shared  = PRIO_TREE_ROOT_INIT,
-       .i_mmap_nonlinear = LIST_HEAD_INIT(swapper_space.i_mmap_nonlinear),
+       .i_mmap         = LIST_HEAD_INIT(swapper_space.i_mmap),
+       .i_mmap_shared  = LIST_HEAD_INIT(swapper_space.i_mmap_shared),
        .i_shared_sem   = __MUTEX_INITIALIZER(swapper_space.i_shared_sem),
        .truncate_count  = ATOMIC_INIT(0),
        .private_lock   = SPIN_LOCK_UNLOCKED,
index e39f33f..b9d2d18 100644 (file)
@@ -191,11 +191,9 @@ static inline int page_mapping_inuse(struct page *page)
                return 1;
 
        /* File is mmap'd by somebody. */
-       if (!prio_tree_empty(&mapping->i_mmap))
+       if (!list_empty(&mapping->i_mmap))
                return 1;
-       if (!prio_tree_empty(&mapping->i_mmap_shared))
-               return 1;
-       if (!list_empty(&mapping->i_mmap_nonlinear))
+       if (!list_empty(&mapping->i_mmap_shared))
                return 1;
 
        return 0;