drm/radeon: disable MSI on RV515
[linux-flexiantxendom0.git] / mm / swap.c
index bbc1ce9..55b266d 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -21,7 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h> /* for try_to_release_page() */
 #include <linux/percpu_counter.h>
@@ -39,6 +39,7 @@ int page_cluster;
 
 static DEFINE_PER_CPU(struct pagevec[NR_LRU_LISTS], lru_add_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -77,39 +78,22 @@ static void put_compound_page(struct page *page)
 {
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
-               struct page *page_head = page->first_page;
-               smp_rmb();
-               /*
-                * If PageTail is still set after smp_rmb() we can be sure
-                * that the page->first_page we read wasn't a dangling pointer.
-                * See __split_huge_page_refcount() smp_wmb().
-                */
-               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+               struct page *page_head = compound_trans_head(page);
+
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
                        unsigned long flags;
                        /*
-                        * Verify that our page_head wasn't converted
-                        * to a a regular page before we got a
-                        * reference on it.
-                        */
-                       if (unlikely(!PageHead(page_head))) {
-                               /* PageHead is cleared after PageTail */
-                               smp_rmb();
-                               VM_BUG_ON(PageTail(page));
-                               goto out_put_head;
-                       }
-                       /*
-                        * Only run compound_lock on a valid PageHead,
-                        * after having it pinned with
-                        * get_page_unless_zero() above.
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+                        * we obtain the lock. That is ok as long as it
+                        * can't be freed from under us.
                         */
-                       smp_mb();
-                       /* page_head wasn't a dangling pointer */
                        flags = compound_lock_irqsave(page_head);
                        if (unlikely(!PageTail(page))) {
                                /* __split_huge_page_refcount run before us */
                                compound_unlock_irqrestore(page_head, flags);
                                VM_BUG_ON(PageHead(page_head));
-                       out_put_head:
                                if (put_page_testzero(page_head))
                                        __put_single_page(page_head);
                        out_put_single:
@@ -120,16 +104,17 @@ static void put_compound_page(struct page *page)
                        VM_BUG_ON(page_head != page->first_page);
                        /*
                         * We can release the refcount taken by
-                        * get_page_unless_zero now that
-                        * split_huge_page_refcount is blocked on the
-                        * compound_lock.
+                        * get_page_unless_zero() now that
+                        * __split_huge_page_refcount() is blocked on
+                        * the compound_lock.
                         */
                        if (put_page_testzero(page_head))
                                VM_BUG_ON(1);
                        /* __split_huge_page_refcount will wait now */
-                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
-                       atomic_dec(&page->_count);
+                       VM_BUG_ON(page_mapcount(page) <= 0);
+                       atomic_dec(&page->_mapcount);
                        VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       VM_BUG_ON(atomic_read(&page->_count) != 0);
                        compound_unlock_irqrestore(page_head, flags);
                        if (put_page_testzero(page_head)) {
                                if (PageHead(page_head))
@@ -159,6 +144,45 @@ void put_page(struct page *page)
 }
 EXPORT_SYMBOL(put_page);
 
+/*
+ * This function is exported but must not be called by anything other
+ * than get_page(). It implements the slow path of get_page().
+ */
+bool __get_page_tail(struct page *page)
+{
+       /*
+        * This takes care of get_page() if run on a tail page
+        * returned by one of the get_user_pages/follow_page variants.
+        * get_user_pages/follow_page itself doesn't need the compound
+        * lock because it runs __get_page_tail_foll() under the
+        * proper PT lock that already serializes against
+        * split_huge_page().
+        */
+       unsigned long flags;
+       bool got = false;
+       struct page *page_head = compound_trans_head(page);
+
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
+               }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
+       }
+       return got;
+}
+EXPORT_SYMBOL(__get_page_tail);
+
 /**
  * put_pages_list() - release a list of pages
  * @pages: list of pages threaded on page->lru
@@ -201,7 +225,7 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
        }
        if (zone)
                spin_unlock_irqrestore(&zone->lru_lock, flags);
-       release_pages(pvec->pages, pagevec_count(pvec), pvec->cold);
+       release_pages(pvec->pages, pvec->nr, pvec->cold);
        pagevec_reinit(pvec);
 }
 
@@ -211,8 +235,9 @@ static void pagevec_move_tail_fn(struct page *page, void *arg)
        struct zone *zone = page_zone(page);
 
        if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-               int lru = page_lru_base_type(page);
+               enum lru_list lru = page_lru_base_type(page);
                list_move_tail(&page->lru, &zone->lru[lru].list);
+               mem_cgroup_rotate_reclaimable_page(page);
                (*pgmoved)++;
        }
 }
@@ -270,41 +295,22 @@ static void update_page_reclaim_stat(struct zone *zone, struct page *page,
                memcg_reclaim_stat->recent_rotated[file]++;
 }
 
-/*
- * A page will go to active list either by activate_page or putback_lru_page.
- * In the activate_page case, the page hasn't active bit set. The page might
- * not in LRU list because it's isolated before it gets a chance to be moved to
- * active list. The window is small because pagevec just stores several pages.
- * For such case, we do nothing for such page.
- * In the putback_lru_page case, the page isn't in lru list but has active
- * bit set
- */
 static void __activate_page(struct page *page, void *arg)
 {
        struct zone *zone = page_zone(page);
-       int file = page_is_file_cache(page);
-       int lru = page_lru_base_type(page);
-       bool putback = !PageLRU(page);
 
-       /* The page is isolated before it's moved to active list */
-       if (!PageLRU(page) && !PageActive(page))
-               return;
-       if ((PageLRU(page) && PageActive(page)) || PageUnevictable(page))
-               return;
-
-       if (!putback)
+       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+               int file = page_is_file_cache(page);
+               int lru = page_lru_base_type(page);
                del_page_from_lru_list(zone, page, lru);
-       else
-               SetPageLRU(page);
 
-       SetPageActive(page);
-       lru += LRU_ACTIVE;
-       add_page_to_lru_list(zone, page, lru);
+               SetPageActive(page);
+               lru += LRU_ACTIVE;
+               add_page_to_lru_list(zone, page, lru);
+               __count_vm_event(PGACTIVATE);
 
-       if (putback)
-               return;
-       __count_vm_event(PGACTIVATE);
-       update_page_reclaim_stat(zone, page, file, 1);
+               update_page_reclaim_stat(zone, page, file, 1);
+       }
 }
 
 #ifdef CONFIG_SMP
@@ -330,20 +336,6 @@ void activate_page(struct page *page)
        }
 }
 
-/* Caller should hold zone->lru_lock */
-int putback_active_lru_page(struct zone *zone, struct page *page)
-{
-       struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
-
-       if (!pagevec_add(pvec, page)) {
-               spin_unlock_irq(&zone->lru_lock);
-               pagevec_lru_move_fn(pvec, __activate_page, NULL);
-               spin_lock_irq(&zone->lru_lock);
-       }
-       put_cpu_var(activate_page_pvecs);
-       return 1;
-}
-
 #else
 static inline void activate_page_drain(int cpu)
 {
@@ -354,8 +346,7 @@ void activate_page(struct page *page)
        struct zone *zone = page_zone(page);
 
        spin_lock_irq(&zone->lru_lock);
-       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page))
-               __activate_page(page, NULL);
+       __activate_page(page, NULL);
        spin_unlock_irq(&zone->lru_lock);
 }
 #endif
@@ -432,6 +423,74 @@ void add_page_to_unevictable_list(struct page *page)
 }
 
 /*
+ * If the page can not be invalidated, it is moved to the
+ * inactive list to speed up its reclaim.  It is moved to the
+ * head of the list, rather than the tail, to give the flusher
+ * threads some time to write it out, as this is much more
+ * effective than the single-page writeout from reclaim.
+ *
+ * If the page isn't page_mapped and dirty/writeback, the page
+ * could reclaim asap using PG_reclaim.
+ *
+ * 1. active, mapped page -> none
+ * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
+ * 3. inactive, mapped page -> none
+ * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
+ * 5. inactive, clean -> inactive, tail
+ * 6. Others -> none
+ *
+ * In 4, why it moves inactive's head, the VM expects the page would
+ * be write it out by flusher threads as this is much more effective
+ * than the single-page writeout from reclaim.
+ */
+static void lru_deactivate_fn(struct page *page, void *arg)
+{
+       int lru, file;
+       bool active;
+       struct zone *zone = page_zone(page);
+
+       if (!PageLRU(page))
+               return;
+
+       if (PageUnevictable(page))
+               return;
+
+       /* Some processes are using the page */
+       if (page_mapped(page))
+               return;
+
+       active = PageActive(page);
+
+       file = page_is_file_cache(page);
+       lru = page_lru_base_type(page);
+       del_page_from_lru_list(zone, page, lru + active);
+       ClearPageActive(page);
+       ClearPageReferenced(page);
+       add_page_to_lru_list(zone, page, lru);
+
+       if (PageWriteback(page) || PageDirty(page)) {
+               /*
+                * PG_reclaim could be raced with end_page_writeback
+                * It can make readahead confusing.  But race window
+                * is _really_ small and  it's non-critical problem.
+                */
+               SetPageReclaim(page);
+       } else {
+               /*
+                * The page's writeback ends up during pagevec
+                * We moves tha page into tail of inactive.
+                */
+               list_move_tail(&page->lru, &zone->lru[lru].list);
+               mem_cgroup_rotate_reclaimable_page(page);
+               __count_vm_event(PGROTATED);
+       }
+
+       if (active)
+               __count_vm_event(PGDEACTIVATE);
+       update_page_reclaim_stat(zone, page, file, 0);
+}
+
+/*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
  * disabled; or "cpu" is being hot-unplugged, and is already dead.
@@ -457,9 +516,40 @@ static void drain_cpu_pagevecs(int cpu)
                pagevec_move_tail(pvec);
                local_irq_restore(flags);
        }
+
+       pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+       if (pagevec_count(pvec))
+               pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
        activate_page_drain(cpu);
 }
 
+/**
+ * deactivate_page - forcefully deactivate a page
+ * @page: page to deactivate
+ *
+ * This function hints the VM that @page is a good reclaim candidate,
+ * for example if its invalidation fails due to the page being dirty
+ * or under writeback.
+ */
+void deactivate_page(struct page *page)
+{
+       /*
+        * In a workload with many unevictable page such as mprotect, unevictable
+        * page deactivation for accelerating reclaim is pointless.
+        */
+       if (PageUnevictable(page))
+               return;
+
+       if (likely(get_page_unless_zero(page))) {
+               struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+               if (!pagevec_add(pvec, page))
+                       pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+               put_cpu_var(lru_deactivate_pvecs);
+       }
+}
+
 void lru_add_drain(void)
 {
        drain_cpu_pagevecs(get_cpu());
@@ -577,7 +667,7 @@ void lru_add_page_tail(struct zone* zone,
        VM_BUG_ON(!PageHead(page));
        VM_BUG_ON(PageCompound(page_tail));
        VM_BUG_ON(PageLRU(page_tail));
-       VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
+       VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
 
        SetPageLRU(page_tail);