drm/radeon: disable MSI on RV515
[linux-flexiantxendom0.git] / mm / swap.c
index 1b9e4eb..55b266d 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -21,7 +21,7 @@
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
-#include <linux/module.h>
+#include <linux/export.h>
 #include <linux/mm_inline.h>
 #include <linux/buffer_head.h> /* for try_to_release_page() */
 #include <linux/percpu_counter.h>
@@ -78,39 +78,22 @@ static void put_compound_page(struct page *page)
 {
        if (unlikely(PageTail(page))) {
                /* __split_huge_page_refcount can run under us */
-               struct page *page_head = page->first_page;
-               smp_rmb();
-               /*
-                * If PageTail is still set after smp_rmb() we can be sure
-                * that the page->first_page we read wasn't a dangling pointer.
-                * See __split_huge_page_refcount() smp_wmb().
-                */
-               if (likely(PageTail(page) && get_page_unless_zero(page_head))) {
+               struct page *page_head = compound_trans_head(page);
+
+               if (likely(page != page_head &&
+                          get_page_unless_zero(page_head))) {
                        unsigned long flags;
                        /*
-                        * Verify that our page_head wasn't converted
-                        * to a a regular page before we got a
-                        * reference on it.
+                        * page_head wasn't a dangling pointer but it
+                        * may not be a head page anymore by the time
+                        * we obtain the lock. That is ok as long as it
+                        * can't be freed from under us.
                         */
-                       if (unlikely(!PageHead(page_head))) {
-                               /* PageHead is cleared after PageTail */
-                               smp_rmb();
-                               VM_BUG_ON(PageTail(page));
-                               goto out_put_head;
-                       }
-                       /*
-                        * Only run compound_lock on a valid PageHead,
-                        * after having it pinned with
-                        * get_page_unless_zero() above.
-                        */
-                       smp_mb();
-                       /* page_head wasn't a dangling pointer */
                        flags = compound_lock_irqsave(page_head);
                        if (unlikely(!PageTail(page))) {
                                /* __split_huge_page_refcount run before us */
                                compound_unlock_irqrestore(page_head, flags);
                                VM_BUG_ON(PageHead(page_head));
-                       out_put_head:
                                if (put_page_testzero(page_head))
                                        __put_single_page(page_head);
                        out_put_single:
@@ -121,16 +104,17 @@ static void put_compound_page(struct page *page)
                        VM_BUG_ON(page_head != page->first_page);
                        /*
                         * We can release the refcount taken by
-                        * get_page_unless_zero now that
-                        * split_huge_page_refcount is blocked on the
-                        * compound_lock.
+                        * get_page_unless_zero() now that
+                        * __split_huge_page_refcount() is blocked on
+                        * the compound_lock.
                         */
                        if (put_page_testzero(page_head))
                                VM_BUG_ON(1);
                        /* __split_huge_page_refcount will wait now */
-                       VM_BUG_ON(atomic_read(&page->_count) <= 0);
-                       atomic_dec(&page->_count);
+                       VM_BUG_ON(page_mapcount(page) <= 0);
+                       atomic_dec(&page->_mapcount);
                        VM_BUG_ON(atomic_read(&page_head->_count) <= 0);
+                       VM_BUG_ON(atomic_read(&page->_count) != 0);
                        compound_unlock_irqrestore(page_head, flags);
                        if (put_page_testzero(page_head)) {
                                if (PageHead(page_head))
@@ -160,6 +144,45 @@ void put_page(struct page *page)
 }
 EXPORT_SYMBOL(put_page);
 
+/*
+ * This function is exported but must not be called by anything other
+ * than get_page(). It implements the slow path of get_page().
+ */
+bool __get_page_tail(struct page *page)
+{
+       /*
+        * This takes care of get_page() if run on a tail page
+        * returned by one of the get_user_pages/follow_page variants.
+        * get_user_pages/follow_page itself doesn't need the compound
+        * lock because it runs __get_page_tail_foll() under the
+        * proper PT lock that already serializes against
+        * split_huge_page().
+        */
+       unsigned long flags;
+       bool got = false;
+       struct page *page_head = compound_trans_head(page);
+
+       if (likely(page != page_head && get_page_unless_zero(page_head))) {
+               /*
+                * page_head wasn't a dangling pointer but it
+                * may not be a head page anymore by the time
+                * we obtain the lock. That is ok as long as it
+                * can't be freed from under us.
+                */
+               flags = compound_lock_irqsave(page_head);
+               /* here __split_huge_page_refcount won't run anymore */
+               if (likely(PageTail(page))) {
+                       __get_page_tail_foll(page, false);
+                       got = true;
+               }
+               compound_unlock_irqrestore(page_head, flags);
+               if (unlikely(!got))
+                       put_page(page_head);
+       }
+       return got;
+}
+EXPORT_SYMBOL(__get_page_tail);
+
 /**
  * put_pages_list() - release a list of pages
  * @pages: list of pages threaded on page->lru
@@ -179,15 +202,13 @@ void put_pages_list(struct list_head *pages)
 }
 EXPORT_SYMBOL(put_pages_list);
 
-/*
- * pagevec_move_tail() must be called with IRQ disabled.
- * Otherwise this may cause nasty races.
- */
-static void pagevec_move_tail(struct pagevec *pvec)
+static void pagevec_lru_move_fn(struct pagevec *pvec,
+                               void (*move_fn)(struct page *page, void *arg),
+                               void *arg)
 {
        int i;
-       int pgmoved = 0;
        struct zone *zone = NULL;
+       unsigned long flags = 0;
 
        for (i = 0; i < pagevec_count(pvec); i++) {
                struct page *page = pvec->pages[i];
@@ -195,30 +216,50 @@ static void pagevec_move_tail(struct pagevec *pvec)
 
                if (pagezone != zone) {
                        if (zone)
-                               spin_unlock(&zone->lru_lock);
+                               spin_unlock_irqrestore(&zone->lru_lock, flags);
                        zone = pagezone;
-                       spin_lock(&zone->lru_lock);
-               }
-               if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
-                       enum lru_list lru = page_lru_base_type(page);
-                       list_move_tail(&page->lru, &zone->lru[lru].list);
-                       mem_cgroup_rotate_reclaimable_page(page);
-                       pgmoved++;
+                       spin_lock_irqsave(&zone->lru_lock, flags);
                }
+
+               (*move_fn)(page, arg);
        }
        if (zone)
-               spin_unlock(&zone->lru_lock);
-       __count_vm_events(PGROTATED, pgmoved);
+               spin_unlock_irqrestore(&zone->lru_lock, flags);
        release_pages(pvec->pages, pvec->nr, pvec->cold);
        pagevec_reinit(pvec);
 }
 
+static void pagevec_move_tail_fn(struct page *page, void *arg)
+{
+       int *pgmoved = arg;
+       struct zone *zone = page_zone(page);
+
+       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+               enum lru_list lru = page_lru_base_type(page);
+               list_move_tail(&page->lru, &zone->lru[lru].list);
+               mem_cgroup_rotate_reclaimable_page(page);
+               (*pgmoved)++;
+       }
+}
+
+/*
+ * pagevec_move_tail() must be called with IRQ disabled.
+ * Otherwise this may cause nasty races.
+ */
+static void pagevec_move_tail(struct pagevec *pvec)
+{
+       int pgmoved = 0;
+
+       pagevec_lru_move_fn(pvec, pagevec_move_tail_fn, &pgmoved);
+       __count_vm_events(PGROTATED, pgmoved);
+}
+
 /*
  * Writeback is about to end against a page which has been marked for immediate
  * reclaim.  If it still appears to be reclaimable, move it to the tail of the
  * inactive list.
  */
-void  rotate_reclaimable_page(struct page *page)
+void rotate_reclaimable_page(struct page *page)
 {
        if (!PageLocked(page) && !PageDirty(page) && !PageActive(page) &&
            !PageUnevictable(page) && PageLRU(page)) {
@@ -254,14 +295,10 @@ static void update_page_reclaim_stat(struct zone *zone, struct page *page,
                memcg_reclaim_stat->recent_rotated[file]++;
 }
 
-/*
- * FIXME: speed this up?
- */
-void activate_page(struct page *page)
+static void __activate_page(struct page *page, void *arg)
 {
        struct zone *zone = page_zone(page);
 
-       spin_lock_irq(&zone->lru_lock);
        if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
                int file = page_is_file_cache(page);
                int lru = page_lru_base_type(page);
@@ -274,8 +311,45 @@ void activate_page(struct page *page)
 
                update_page_reclaim_stat(zone, page, file, 1);
        }
+}
+
+#ifdef CONFIG_SMP
+static DEFINE_PER_CPU(struct pagevec, activate_page_pvecs);
+
+static void activate_page_drain(int cpu)
+{
+       struct pagevec *pvec = &per_cpu(activate_page_pvecs, cpu);
+
+       if (pagevec_count(pvec))
+               pagevec_lru_move_fn(pvec, __activate_page, NULL);
+}
+
+void activate_page(struct page *page)
+{
+       if (PageLRU(page) && !PageActive(page) && !PageUnevictable(page)) {
+               struct pagevec *pvec = &get_cpu_var(activate_page_pvecs);
+
+               page_cache_get(page);
+               if (!pagevec_add(pvec, page))
+                       pagevec_lru_move_fn(pvec, __activate_page, NULL);
+               put_cpu_var(activate_page_pvecs);
+       }
+}
+
+#else
+static inline void activate_page_drain(int cpu)
+{
+}
+
+void activate_page(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+
+       spin_lock_irq(&zone->lru_lock);
+       __activate_page(page, NULL);
        spin_unlock_irq(&zone->lru_lock);
 }
+#endif
 
 /*
  * Mark a page as having seen activity.
@@ -354,54 +428,68 @@ void add_page_to_unevictable_list(struct page *page)
  * head of the list, rather than the tail, to give the flusher
  * threads some time to write it out, as this is much more
  * effective than the single-page writeout from reclaim.
+ *
+ * If the page isn't page_mapped and dirty/writeback, the page
+ * could reclaim asap using PG_reclaim.
+ *
+ * 1. active, mapped page -> none
+ * 2. active, dirty/writeback page -> inactive, head, PG_reclaim
+ * 3. inactive, mapped page -> none
+ * 4. inactive, dirty/writeback page -> inactive, head, PG_reclaim
+ * 5. inactive, clean -> inactive, tail
+ * 6. Others -> none
+ *
+ * In 4, why it moves inactive's head, the VM expects the page would
+ * be write it out by flusher threads as this is much more effective
+ * than the single-page writeout from reclaim.
  */
-static void lru_deactivate(struct page *page, struct zone *zone)
+static void lru_deactivate_fn(struct page *page, void *arg)
 {
        int lru, file;
+       bool active;
+       struct zone *zone = page_zone(page);
+
+       if (!PageLRU(page))
+               return;
 
-       if (!PageLRU(page) || !PageActive(page))
+       if (PageUnevictable(page))
                return;
 
        /* Some processes are using the page */
        if (page_mapped(page))
                return;
 
+       active = PageActive(page);
+
        file = page_is_file_cache(page);
        lru = page_lru_base_type(page);
-       del_page_from_lru_list(zone, page, lru + LRU_ACTIVE);
+       del_page_from_lru_list(zone, page, lru + active);
        ClearPageActive(page);
        ClearPageReferenced(page);
        add_page_to_lru_list(zone, page, lru);
-       __count_vm_event(PGDEACTIVATE);
 
-       update_page_reclaim_stat(zone, page, file, 0);
-}
-
-static void ____pagevec_lru_deactivate(struct pagevec *pvec)
-{
-       int i;
-       struct zone *zone = NULL;
-
-       for (i = 0; i < pagevec_count(pvec); i++) {
-               struct page *page = pvec->pages[i];
-               struct zone *pagezone = page_zone(page);
-
-               if (pagezone != zone) {
-                       if (zone)
-                               spin_unlock_irq(&zone->lru_lock);
-                       zone = pagezone;
-                       spin_lock_irq(&zone->lru_lock);
-               }
-               lru_deactivate(page, zone);
+       if (PageWriteback(page) || PageDirty(page)) {
+               /*
+                * PG_reclaim could be raced with end_page_writeback
+                * It can make readahead confusing.  But race window
+                * is _really_ small and  it's non-critical problem.
+                */
+               SetPageReclaim(page);
+       } else {
+               /*
+                * The page's writeback ends up during pagevec
+                * We moves tha page into tail of inactive.
+                */
+               list_move_tail(&page->lru, &zone->lru[lru].list);
+               mem_cgroup_rotate_reclaimable_page(page);
+               __count_vm_event(PGROTATED);
        }
-       if (zone)
-               spin_unlock_irq(&zone->lru_lock);
 
-       release_pages(pvec->pages, pvec->nr, pvec->cold);
-       pagevec_reinit(pvec);
+       if (active)
+               __count_vm_event(PGDEACTIVATE);
+       update_page_reclaim_stat(zone, page, file, 0);
 }
 
-
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
@@ -431,7 +519,9 @@ static void drain_cpu_pagevecs(int cpu)
 
        pvec = &per_cpu(lru_deactivate_pvecs, cpu);
        if (pagevec_count(pvec))
-               ____pagevec_lru_deactivate(pvec);
+               pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
+       activate_page_drain(cpu);
 }
 
 /**
@@ -444,11 +534,18 @@ static void drain_cpu_pagevecs(int cpu)
  */
 void deactivate_page(struct page *page)
 {
+       /*
+        * In a workload with many unevictable page such as mprotect, unevictable
+        * page deactivation for accelerating reclaim is pointless.
+        */
+       if (PageUnevictable(page))
+               return;
+
        if (likely(get_page_unless_zero(page))) {
                struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
 
                if (!pagevec_add(pvec, page))
-                       ____pagevec_lru_deactivate(pvec);
+                       pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
                put_cpu_var(lru_deactivate_pvecs);
        }
 }
@@ -570,7 +667,7 @@ void lru_add_page_tail(struct zone* zone,
        VM_BUG_ON(!PageHead(page));
        VM_BUG_ON(PageCompound(page_tail));
        VM_BUG_ON(PageLRU(page_tail));
-       VM_BUG_ON(!spin_is_locked(&zone->lru_lock));
+       VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&zone->lru_lock));
 
        SetPageLRU(page_tail);
 
@@ -595,44 +692,33 @@ void lru_add_page_tail(struct zone* zone,
        }
 }
 
+static void ____pagevec_lru_add_fn(struct page *page, void *arg)
+{
+       enum lru_list lru = (enum lru_list)arg;
+       struct zone *zone = page_zone(page);
+       int file = is_file_lru(lru);
+       int active = is_active_lru(lru);
+
+       VM_BUG_ON(PageActive(page));
+       VM_BUG_ON(PageUnevictable(page));
+       VM_BUG_ON(PageLRU(page));
+
+       SetPageLRU(page);
+       if (active)
+               SetPageActive(page);
+       update_page_reclaim_stat(zone, page, file, active);
+       add_page_to_lru_list(zone, page, lru);
+}
+
 /*
  * Add the passed pages to the LRU, then drop the caller's refcount
  * on them.  Reinitialises the caller's pagevec.
  */
 void ____pagevec_lru_add(struct pagevec *pvec, enum lru_list lru)
 {
-       int i;
-       struct zone *zone = NULL;
-
        VM_BUG_ON(is_unevictable_lru(lru));
 
-       for (i = 0; i < pagevec_count(pvec); i++) {
-               struct page *page = pvec->pages[i];
-               struct zone *pagezone = page_zone(page);
-               int file;
-               int active;
-
-               if (pagezone != zone) {
-                       if (zone)
-                               spin_unlock_irq(&zone->lru_lock);
-                       zone = pagezone;
-                       spin_lock_irq(&zone->lru_lock);
-               }
-               VM_BUG_ON(PageActive(page));
-               VM_BUG_ON(PageUnevictable(page));
-               VM_BUG_ON(PageLRU(page));
-               SetPageLRU(page);
-               active = is_active_lru(lru);
-               file = is_file_lru(lru);
-               if (active)
-                       SetPageActive(page);
-               update_page_reclaim_stat(zone, page, file, active);
-               add_page_to_lru_list(zone, page, lru);
-       }
-       if (zone)
-               spin_unlock_irq(&zone->lru_lock);
-       release_pages(pvec->pages, pvec->nr, pvec->cold);
-       pagevec_reinit(pvec);
+       pagevec_lru_move_fn(pvec, ____pagevec_lru_add_fn, (void *)lru);
 }
 
 EXPORT_SYMBOL(____pagevec_lru_add);