KVM: PPC: Fix PR KVM on POWER7 bare metal

[linux-flexiantxendom0-3.2.10.git] / mm / page_cgroup.c
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c

index 6bdc67d..1ccbd71 100644 (file)
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -11,13 +11,6 @@
  #include <linux/swapops.h>
  #include <linux/kmemleak.h>
  
-static void __meminit init_page_cgroup(struct page_cgroup *pc, unsigned long id)
-{
-       pc->flags = 0;
-       set_page_cgroup_array_id(pc, id);
-       pc->mem_cgroup = NULL;
-       INIT_LIST_HEAD(&pc->lru);
-}
  static unsigned long total_usage;
  
  #if !defined(CONFIG_SPARSEMEM)
@@ -35,35 +28,27 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
         struct page_cgroup *base;
  
         base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
+#ifdef CONFIG_DEBUG_VM
+       /*
+        * The sanity checks the page allocator does upon freeing a
+        * page can reach here before the page_cgroup arrays are
+        * allocated when feeding a range of pages to the allocator
+        * for the first time during bootup or memory hotplug.
+        */
         if (unlikely(!base))
                 return NULL;
-
+#endif
         offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
         return base + offset;
  }
  
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-       unsigned long pfn;
-       struct page *page;
-       pg_data_t *pgdat;
-
-       pgdat = NODE_DATA(page_cgroup_array_id(pc));
-       pfn = pc - pgdat->node_page_cgroup + pgdat->node_start_pfn;
-       page = pfn_to_page(pfn);
-       VM_BUG_ON(pc != lookup_page_cgroup(page));
-       return page;
-}
-
  static int __init alloc_node_page_cgroup(int nid)
  {
-       struct page_cgroup *base, *pc;
+       struct page_cgroup *base;
         unsigned long table_size;
-       unsigned long start_pfn, nr_pages, index;
+       unsigned long nr_pages;
  
-       start_pfn = NODE_DATA(nid)->node_start_pfn;
         nr_pages = NODE_DATA(nid)->node_spanned_pages;
-
         if (!nr_pages)
                 return 0;
  
@@ -73,10 +58,6 @@ static int __init alloc_node_page_cgroup(int nid)
                         table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
         if (!base)
                 return -ENOMEM;
-       for (index = 0; index < nr_pages; index++) {
-               pc = base + index;
-               init_page_cgroup(pc, nid);
-       }
         NODE_DATA(nid)->node_page_cgroup = base;
         total_usage += table_size;
         return 0;
@@ -111,67 +92,45 @@ struct page_cgroup *lookup_page_cgroup(struct page *page)
  {
         unsigned long pfn = page_to_pfn(page);
         struct mem_section *section = __pfn_to_section(pfn);
-
+#ifdef CONFIG_DEBUG_VM
+       /*
+        * The sanity checks the page allocator does upon freeing a
+        * page can reach here before the page_cgroup arrays are
+        * allocated when feeding a range of pages to the allocator
+        * for the first time during bootup or memory hotplug.
+        */
         if (!section->page_cgroup)
                 return NULL;
+#endif
         return section->page_cgroup + pfn;
  }
  
-struct page *lookup_cgroup_page(struct page_cgroup *pc)
-{
-       struct mem_section *section;
-       struct page *page;
-       unsigned long nr;
-
-       nr = page_cgroup_array_id(pc);
-       section = __nr_to_section(nr);
-       page = pfn_to_page(pc - section->page_cgroup);
-       VM_BUG_ON(pc != lookup_page_cgroup(page));
-       return page;
-}
-
  static void *__meminit alloc_page_cgroup(size_t size, int nid)
  {
+       gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
         void *addr = NULL;
  
-       addr = alloc_pages_exact_nid(nid, size, GFP_KERNEL | __GFP_NOWARN);
-       if (addr)
+       addr = alloc_pages_exact_nid(nid, size, flags);
+       if (addr) {
+               kmemleak_alloc(addr, size, 1, flags);
                 return addr;
+       }
  
         if (node_state(nid, N_HIGH_MEMORY))
-               addr = vmalloc_node(size, nid);
+               addr = vzalloc_node(size, nid);
         else
-               addr = vmalloc(size);
+               addr = vzalloc(size);
  
         return addr;
  }
  
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void free_page_cgroup(void *addr)
-{
-       if (is_vmalloc_addr(addr)) {
-               vfree(addr);
-       } else {
-               struct page *page = virt_to_page(addr);
-               size_t table_size =
-                       sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-
-               BUG_ON(PageReserved(page));
-               free_pages_exact(addr, table_size);
-       }
-}
-#endif
-
  static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
  {
-       struct page_cgroup *base, *pc;
         struct mem_section *section;
+       struct page_cgroup *base;
         unsigned long table_size;
-       unsigned long nr;
-       int index;
  
-       nr = pfn_to_section_nr(pfn);
-       section = __nr_to_section(nr);
+       section = __pfn_to_section(pfn);
  
         if (section->page_cgroup)
                 return 0;
@@ -191,10 +150,6 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
                 return -ENOMEM;
         }
  
-       for (index = 0; index < PAGES_PER_SECTION; index++) {
-               pc = base + index;
-               init_page_cgroup(pc, nr);
-       }
         /*
          * The passed "pfn" may not be aligned to SECTION.  For the calculation
          * we need to apply a mask.
@@ -205,6 +160,20 @@ static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
         return 0;
  }
  #ifdef CONFIG_MEMORY_HOTPLUG
+static void free_page_cgroup(void *addr)
+{
+       if (is_vmalloc_addr(addr)) {
+               vfree(addr);
+       } else {
+               struct page *page = virt_to_page(addr);
+               size_t table_size =
+                       sizeof(struct page_cgroup) * PAGES_PER_SECTION;
+
+               BUG_ON(PageReserved(page));
+               free_pages_exact(addr, table_size);
+       }
+}
+
  void __free_page_cgroup(unsigned long pfn)
  {
         struct mem_section *ms;
@@ -357,13 +326,12 @@ struct swap_cgroup_ctrl {
         spinlock_t      lock;
  };
  
-struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
  
  struct swap_cgroup {
         unsigned short          id;
  };
  #define SC_PER_PAGE    (PAGE_SIZE/sizeof(struct swap_cgroup))
-#define SC_POS_MASK    (SC_PER_PAGE - 1)
  
  /*
   * SwapCgroup implements "lookup" and "exchange" operations.
@@ -405,6 +373,23 @@ not_enough_page:
         return -ENOMEM;
  }
  
+static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
+                                       struct swap_cgroup_ctrl **ctrlp)
+{
+       pgoff_t offset = swp_offset(ent);
+       struct swap_cgroup_ctrl *ctrl;
+       struct page *mappage;
+       struct swap_cgroup *sc;
+
+       ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+       if (ctrlp)
+               *ctrlp = ctrl;
+
+       mappage = ctrl->map[offset / SC_PER_PAGE];
+       sc = page_address(mappage);
+       return sc + offset % SC_PER_PAGE;
+}
+
  /**
   * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
   * @end: swap entry to be cmpxchged
@@ -417,21 +402,13 @@ not_enough_page:
  unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
                                         unsigned short old, unsigned short new)
  {
-       int type = swp_type(ent);
-       unsigned long offset = swp_offset(ent);
-       unsigned long idx = offset / SC_PER_PAGE;
-       unsigned long pos = offset & SC_POS_MASK;
         struct swap_cgroup_ctrl *ctrl;
-       struct page *mappage;
         struct swap_cgroup *sc;
         unsigned long flags;
         unsigned short retval;
  
-       ctrl = &swap_cgroup_ctrl[type];
+       sc = lookup_swap_cgroup(ent, &ctrl);
  
-       mappage = ctrl->map[idx];
-       sc = page_address(mappage);
-       sc += pos;
         spin_lock_irqsave(&ctrl->lock, flags);
         retval = sc->id;
         if (retval == old)
@@ -452,21 +429,13 @@ unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
   */
  unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
  {
-       int type = swp_type(ent);
-       unsigned long offset = swp_offset(ent);
-       unsigned long idx = offset / SC_PER_PAGE;
-       unsigned long pos = offset & SC_POS_MASK;
         struct swap_cgroup_ctrl *ctrl;
-       struct page *mappage;
         struct swap_cgroup *sc;
         unsigned short old;
         unsigned long flags;
  
-       ctrl = &swap_cgroup_ctrl[type];
+       sc = lookup_swap_cgroup(ent, &ctrl);
  
-       mappage = ctrl->map[idx];
-       sc = page_address(mappage);
-       sc += pos;
         spin_lock_irqsave(&ctrl->lock, flags);
         old = sc->id;
         sc->id = id;
@@ -476,28 +445,14 @@ unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
  }
  
  /**
- * lookup_swap_cgroup - lookup mem_cgroup tied to swap entry
+ * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
   * @ent: swap entry to be looked up.
   *
   * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
   */
-unsigned short lookup_swap_cgroup(swp_entry_t ent)
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
  {
-       int type = swp_type(ent);
-       unsigned long offset = swp_offset(ent);
-       unsigned long idx = offset / SC_PER_PAGE;
-       unsigned long pos = offset & SC_POS_MASK;
-       struct swap_cgroup_ctrl *ctrl;
-       struct page *mappage;
-       struct swap_cgroup *sc;
-       unsigned short ret;
-
-       ctrl = &swap_cgroup_ctrl[type];
-       mappage = ctrl->map[idx];
-       sc = page_address(mappage);
-       sc += pos;
-       ret = sc->id;
-       return ret;
+       return lookup_swap_cgroup(ent, NULL)->id;
  }
  
  int swap_cgroup_swapon(int type, unsigned long max_pages)