memcg: use new logic for page stat accounting

author KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Wed, 21 Mar 2012 23:34:25 +0000 (16:34 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Thu, 22 Mar 2012 00:55:01 +0000 (17:55 -0700)
author KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Wed, 21 Mar 2012 23:34:25 +0000 (16:34 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Thu, 22 Mar 2012 00:55:01 +0000 (17:55 -0700)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h

index c54e5df..bf7ae01 100644 (file)
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -141,6 +141,31 @@ static inline bool mem_cgroup_disabled(void)
         return false;
  }
  
+void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
+                                        unsigned long *flags);
+
+static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+                                       bool *locked, unsigned long *flags)
+{
+       if (mem_cgroup_disabled())
+               return;
+       rcu_read_lock();
+       *locked = false;
+       return __mem_cgroup_begin_update_page_stat(page, locked, flags);
+}
+
+void __mem_cgroup_end_update_page_stat(struct page *page,
+                               unsigned long *flags);
+static inline void mem_cgroup_end_update_page_stat(struct page *page,
+                                       bool *locked, unsigned long *flags)
+{
+       if (mem_cgroup_disabled())
+               return;
+       if (*locked)
+               __mem_cgroup_end_update_page_stat(page, flags);
+       rcu_read_unlock();
+}
+
  void mem_cgroup_update_page_stat(struct page *page,
                                  enum mem_cgroup_page_stat_item idx,
                                  int val);
@@ -341,6 +366,16 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
  {
  }
  
+static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+                                       bool *locked, unsigned long *flags)
+{
+}
+
+static inline void mem_cgroup_end_update_page_stat(struct page *page,
+                                       bool *locked, unsigned long *flags)
+{
+}
+
  static inline void mem_cgroup_inc_page_stat(struct page *page,
                                             enum mem_cgroup_page_stat_item idx)
  {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 8afed28..df1e180 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1910,32 +1910,59 @@ bool mem_cgroup_handle_oom(struct mem_cgroup *memcg, gfp_t mask, int order)
   * If there is, we take a lock.
   */
  
+void __mem_cgroup_begin_update_page_stat(struct page *page,
+                               bool *locked, unsigned long *flags)
+{
+       struct mem_cgroup *memcg;
+       struct page_cgroup *pc;
+
+       pc = lookup_page_cgroup(page);
+again:
+       memcg = pc->mem_cgroup;
+       if (unlikely(!memcg || !PageCgroupUsed(pc)))
+               return;
+       /*
+        * If this memory cgroup is not under account moving, we don't
+        * need to take move_lock_page_cgroup(). Because we already hold
+        * rcu_read_lock(), any calls to move_account will be delayed until
+        * rcu_read_unlock() if mem_cgroup_stealed() == true.
+        */
+       if (!mem_cgroup_stealed(memcg))
+               return;
+
+       move_lock_mem_cgroup(memcg, flags);
+       if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+               move_unlock_mem_cgroup(memcg, flags);
+               goto again;
+       }
+       *locked = true;
+}
+
+void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
+{
+       struct page_cgroup *pc = lookup_page_cgroup(page);
+
+       /*
+        * It's guaranteed that pc->mem_cgroup never changes while
+        * lock is held because a routine modifies pc->mem_cgroup
+        * should take move_lock_page_cgroup().
+        */
+       move_unlock_mem_cgroup(pc->mem_cgroup, flags);
+}
+
  void mem_cgroup_update_page_stat(struct page *page,
                                  enum mem_cgroup_page_stat_item idx, int val)
  {
         struct mem_cgroup *memcg;
         struct page_cgroup *pc = lookup_page_cgroup(page);
-       bool need_unlock = false;
         unsigned long uninitialized_var(flags);
  
         if (mem_cgroup_disabled())
                 return;
-again:
-       rcu_read_lock();
+
         memcg = pc->mem_cgroup;
         if (unlikely(!memcg || !PageCgroupUsed(pc)))
-               goto out;
-       /* pc->mem_cgroup is unstable ? */
-       if (unlikely(mem_cgroup_stealed(memcg))) {
-               /* take a lock against to access pc->mem_cgroup */
-               move_lock_mem_cgroup(memcg, &flags);
-               if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
-                       move_unlock_mem_cgroup(memcg, &flags);
-                       rcu_read_unlock();
-                       goto again;
-               }
-               need_unlock = true;
-       }
+               return;
  
         switch (idx) {
         case MEMCG_NR_FILE_MAPPED:
@@ -1950,11 +1977,6 @@ again:
         }
  
         this_cpu_add(memcg->stat->count[idx], val);
-
-out:
-       if (unlikely(need_unlock))
-               move_unlock_mem_cgroup(memcg, &flags);
-       rcu_read_unlock();
  }
  
  /*
diff --git a/mm/rmap.c b/mm/rmap.c

index ebeb95e..5b5ad58 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1148,10 +1148,15 @@ void page_add_new_anon_rmap(struct page *page,
   */
  void page_add_file_rmap(struct page *page)
  {
+       bool locked;
+       unsigned long flags;
+
+       mem_cgroup_begin_update_page_stat(page, &locked, &flags);
         if (atomic_inc_and_test(&page->_mapcount)) {
                 __inc_zone_page_state(page, NR_FILE_MAPPED);
                 mem_cgroup_inc_page_stat(page, MEMCG_NR_FILE_MAPPED);
         }
+       mem_cgroup_end_update_page_stat(page, &locked, &flags);
  }
  
  /**
@@ -1162,9 +1167,21 @@ void page_add_file_rmap(struct page *page)
   */
  void page_remove_rmap(struct page *page)
  {
+       bool anon = PageAnon(page);
+       bool locked;
+       unsigned long flags;
+
+       /*
+        * The anon case has no mem_cgroup page_stat to update; but may
+        * uncharge_page() below, where the lock ordering can deadlock if
+        * we hold the lock against page_stat move: so avoid it on anon.
+        */
+       if (!anon)
+               mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+
         /* page still mapped by someone else? */
         if (!atomic_add_negative(-1, &page->_mapcount))
-               return;
+               goto out;
  
         /*
          * Now that the last pte has gone, s390 must transfer dirty
@@ -1173,7 +1190,7 @@ void page_remove_rmap(struct page *page)
          * not if it's in swapcache - there might be another pte slot
          * containing the swap entry, but page not yet written to swap.
          */
-       if ((!PageAnon(page) || PageSwapCache(page)) &&
+       if ((!anon || PageSwapCache(page)) &&
             page_test_and_clear_dirty(page_to_pfn(page), 1))
                 set_page_dirty(page);
         /*
@@ -1181,8 +1198,8 @@ void page_remove_rmap(struct page *page)
          * and not charged by memcg for now.
          */
         if (unlikely(PageHuge(page)))
-               return;
-       if (PageAnon(page)) {
+               goto out;
+       if (anon) {
                 mem_cgroup_uncharge_page(page);
                 if (!PageTransHuge(page))
                         __dec_zone_page_state(page, NR_ANON_PAGES);
@@ -1202,6 +1219,9 @@ void page_remove_rmap(struct page *page)
          * Leaving it set also helps swapoff to reinstate ptes
          * faster for those pages still in swapcache.
          */
+out:
+       if (!anon)
+               mem_cgroup_end_update_page_stat(page, &locked, &flags);
  }
  
  /*
author	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
	Wed, 21 Mar 2012 23:34:25 +0000 (16:34 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Thu, 22 Mar 2012 00:55:01 +0000 (17:55 -0700)
include/linux/memcontrol.h		patch \| blob \| history
mm/memcontrol.c		patch \| blob \| history
mm/rmap.c		patch \| blob \| history