#include <linux/swap.h>
#include <linux/ksm.h>
#include <linux/hash.h>
+#include <linux/freezer.h>
+#include <linux/oom.h>
#include <asm/tlbflush.h>
#include "internal.h"
return rmap_item->address & STABLE_FLAG;
}
-static void hold_anon_vma(struct rmap_item *rmap_item,
- struct anon_vma *anon_vma)
-{
- rmap_item->anon_vma = anon_vma;
- get_anon_vma(anon_vma);
-}
-
-static void ksm_drop_anon_vma(struct rmap_item *rmap_item)
-{
- struct anon_vma *anon_vma = rmap_item->anon_vma;
-
- drop_anon_vma(anon_vma);
-}
-
/*
* ksmd, and unmerge_and_remove_all_rmap_items(), must not touch an mm's
* page tables after it has passed through ksm_exit() - which, if necessary,
return (ret & VM_FAULT_OOM) ? -ENOMEM : 0;
}
+static struct vm_area_struct *find_mergeable_vma(struct mm_struct *mm,
+ unsigned long addr)
+{
+ struct vm_area_struct *vma;
+ if (ksm_test_exit(mm))
+ return NULL;
+ vma = find_vma(mm, addr);
+ if (!vma || vma->vm_start > addr)
+ return NULL;
+ if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+ return NULL;
+ return vma;
+}
+
static void break_cow(struct rmap_item *rmap_item)
{
struct mm_struct *mm = rmap_item->mm;
* It is not an accident that whenever we want to break COW
* to undo, we also need to drop a reference to the anon_vma.
*/
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
down_read(&mm->mmap_sem);
- if (ksm_test_exit(mm))
- goto out;
- vma = find_vma(mm, addr);
- if (!vma || vma->vm_start > addr)
- goto out;
- if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
- goto out;
- break_ksm(vma, addr);
-out:
+ vma = find_mergeable_vma(mm, addr);
+ if (vma)
+ break_ksm(vma, addr);
up_read(&mm->mmap_sem);
}
+static struct page *page_trans_compound_anon(struct page *page)
+{
+ if (PageTransCompound(page)) {
+ struct page *head = compound_trans_head(page);
+ /*
+ * head may actually be splitted and freed from under
+ * us but it's ok here.
+ */
+ if (PageAnon(head))
+ return head;
+ }
+ return NULL;
+}
+
static struct page *get_mergeable_page(struct rmap_item *rmap_item)
{
struct mm_struct *mm = rmap_item->mm;
struct page *page;
down_read(&mm->mmap_sem);
- if (ksm_test_exit(mm))
- goto out;
- vma = find_vma(mm, addr);
- if (!vma || vma->vm_start > addr)
- goto out;
- if (!(vma->vm_flags & VM_MERGEABLE) || !vma->anon_vma)
+ vma = find_mergeable_vma(mm, addr);
+ if (!vma)
goto out;
page = follow_page(vma, addr, FOLL_GET);
if (IS_ERR_OR_NULL(page))
goto out;
- if (PageAnon(page)) {
+ if (PageAnon(page) || page_trans_compound_anon(page)) {
flush_anon_page(vma, page, addr);
flush_dcache_page(page);
} else {
ksm_pages_sharing--;
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
rmap_item->address &= PAGE_MASK;
cond_resched();
}
else
ksm_pages_shared--;
- ksm_drop_anon_vma(rmap_item);
+ put_anon_vma(rmap_item->anon_vma);
rmap_item->address &= PAGE_MASK;
} else if (rmap_item->address & UNSTABLE_FLAG) {
static u32 calc_checksum(struct page *page)
{
u32 checksum;
- void *addr = kmap_atomic(page, KM_USER0);
+ void *addr = kmap_atomic(page);
checksum = jhash2(addr, PAGE_SIZE / 4, 17);
- kunmap_atomic(addr, KM_USER0);
+ kunmap_atomic(addr);
return checksum;
}
char *addr1, *addr2;
int ret;
- addr1 = kmap_atomic(page1, KM_USER0);
- addr2 = kmap_atomic(page2, KM_USER1);
+ addr1 = kmap_atomic(page1);
+ addr2 = kmap_atomic(page2);
ret = memcmp(addr1, addr2, PAGE_SIZE);
- kunmap_atomic(addr2, KM_USER1);
- kunmap_atomic(addr1, KM_USER0);
+ kunmap_atomic(addr2);
+ kunmap_atomic(addr1);
return ret;
}
if (addr == -EFAULT)
goto out;
+ BUG_ON(PageTransCompound(page));
ptep = page_check_address(page, mm, addr, &ptl, 0);
if (!ptep)
goto out;
- if (pte_write(*ptep)) {
+ if (pte_write(*ptep) || pte_dirty(*ptep)) {
pte_t entry;
swapped = PageSwapCache(page);
flush_cache_page(vma, addr, page_to_pfn(page));
/*
- * Ok this is tricky, when get_user_pages_fast() run it doesnt
+ * Ok this is tricky, when get_user_pages_fast() run it doesn't
* take any lock, therefore the check that we are going to make
* with the pagecount against the mapcount is racey and
* O_DIRECT can happen right after the check.
set_pte_at(mm, addr, ptep, entry);
goto out_unlock;
}
- entry = pte_wrprotect(entry);
+ if (pte_dirty(entry))
+ set_page_dirty(page);
+ entry = pte_mkclean(pte_wrprotect(entry));
set_pte_at_notify(mm, addr, ptep, entry);
}
*orig_pte = *ptep;
goto out;
pmd = pmd_offset(pud, addr);
+ BUG_ON(pmd_trans_huge(*pmd));
if (!pmd_present(*pmd))
goto out;
set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
page_remove_rmap(page);
+ if (!page_mapped(page))
+ try_to_free_swap(page);
put_page(page);
pte_unmap_unlock(ptep, ptl);
return err;
}
+static int page_trans_compound_anon_split(struct page *page)
+{
+ int ret = 0;
+ struct page *transhuge_head = page_trans_compound_anon(page);
+ if (transhuge_head) {
+ /* Get the reference on the head to split it. */
+ if (get_page_unless_zero(transhuge_head)) {
+ /*
+ * Recheck we got the reference while the head
+ * was still anonymous.
+ */
+ if (PageAnon(transhuge_head))
+ ret = split_huge_page(transhuge_head);
+ else
+ /*
+ * Retry later if split_huge_page run
+ * from under us.
+ */
+ ret = 1;
+ put_page(transhuge_head);
+ } else
+ /* Retry later if split_huge_page run from under us. */
+ ret = 1;
+ }
+ return ret;
+}
+
/*
* try_to_merge_one_page - take two pages and merge them into one
* @vma: the vma that holds the pte pointing to page
if (!(vma->vm_flags & VM_MERGEABLE))
goto out;
+ if (PageTransCompound(page) && page_trans_compound_anon_split(page))
+ goto out;
+ BUG_ON(PageTransCompound(page));
if (!PageAnon(page))
goto out;
goto out;
/* Must get reference to anon_vma while still holding mmap_sem */
- hold_anon_vma(rmap_item, vma->anon_vma);
+ rmap_item->anon_vma = vma->anon_vma;
+ get_anon_vma(vma->anon_vma);
out:
up_read(&mm->mmap_sem);
return err;
slot = ksm_scan.mm_slot;
if (slot == &ksm_mm_head) {
+ /*
+ * A number of pages can hang around indefinitely on per-cpu
+ * pagevecs, raised page count preventing write_protect_page
+ * from merging them. Though it doesn't really matter much,
+ * it is puzzling to see some stuck in pages_volatile until
+ * other activity jostles them out, and they also prevented
+ * LTP's KSM test from succeeding deterministically; so drain
+ * them here (here rather than on entry to ksm_do_scan(),
+ * so we don't IPI too often when pages_to_scan is set low).
+ */
+ lru_add_drain_all();
+
root_unstable_tree = RB_ROOT;
spin_lock(&ksm_mmlist_lock);
slot = list_entry(slot->mm_list.next, struct mm_slot, mm_list);
ksm_scan.mm_slot = slot;
spin_unlock(&ksm_mmlist_lock);
+ /*
+ * Although we tested list_empty() above, a racing __ksm_exit
+ * of the last mm on the list may have removed it since then.
+ */
+ if (slot == &ksm_mm_head)
+ return NULL;
next_mm:
ksm_scan.address = 0;
ksm_scan.rmap_list = &slot->rmap_list;
if (ksm_test_exit(mm))
break;
*page = follow_page(vma, ksm_scan.address, FOLL_GET);
- if (!IS_ERR_OR_NULL(*page) && PageAnon(*page)) {
+ if (IS_ERR_OR_NULL(*page)) {
+ ksm_scan.address += PAGE_SIZE;
+ cond_resched();
+ continue;
+ }
+ if (PageAnon(*page) ||
+ page_trans_compound_anon(*page)) {
flush_anon_page(vma, *page, ksm_scan.address);
flush_dcache_page(*page);
rmap_item = get_next_rmap_item(slot,
up_read(&mm->mmap_sem);
return rmap_item;
}
- if (!IS_ERR_OR_NULL(*page))
- put_page(*page);
+ put_page(*page);
ksm_scan.address += PAGE_SIZE;
cond_resched();
}
struct rmap_item *rmap_item;
struct page *uninitialized_var(page);
- while (scan_npages--) {
+ while (scan_npages-- && likely(!freezing(current))) {
cond_resched();
rmap_item = scan_get_next_rmap_item(&page);
if (!rmap_item)
static int ksm_scan_thread(void *nothing)
{
+ set_freezable();
set_user_nice(current, 5);
while (!kthread_should_stop()) {
ksm_do_scan(ksm_thread_pages_to_scan);
mutex_unlock(&ksm_thread_mutex);
+ try_to_freeze();
+
if (ksmd_should_run()) {
schedule_timeout_interruptible(
msecs_to_jiffies(ksm_thread_sleep_millisecs));
} else {
- wait_event_interruptible(ksm_thread_wait,
+ wait_event_freezable(ksm_thread_wait,
ksmd_should_run() || kthread_should_stop());
}
}
{
struct page *new_page;
- unlock_page(page); /* any racers will COW it, not modify it */
-
new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
if (new_page) {
copy_user_highpage(new_page, page, address, vma);
add_page_to_unevictable_list(new_page);
}
- page_cache_release(page);
return new_page;
}
/*
* Keep it very simple for now: just lock out ksmd and
* MADV_UNMERGEABLE while any memory is going offline.
+ * mutex_lock_nested() is necessary because lockdep was alarmed
+ * that here we take ksm_thread_mutex inside notifier chain
+ * mutex, and later take notifier chain mutex inside
+ * ksm_thread_mutex to unlock it. But that's safe because both
+ * are inside mem_hotplug_mutex.
*/
- mutex_lock(&ksm_thread_mutex);
+ mutex_lock_nested(&ksm_thread_mutex, SINGLE_DEPTH_NESTING);
break;
case MEM_OFFLINE:
if (ksm_run != flags) {
ksm_run = flags;
if (flags & KSM_RUN_UNMERGE) {
- current->flags |= PF_OOM_ORIGIN;
+ int oom_score_adj;
+
+ oom_score_adj = test_set_oom_score_adj(OOM_SCORE_ADJ_MAX);
err = unmerge_and_remove_all_rmap_items();
- current->flags &= ~PF_OOM_ORIGIN;
+ compare_swap_oom_score_adj(OOM_SCORE_ADJ_MAX,
+ oom_score_adj);
if (err) {
ksm_run = KSM_RUN_STOP;
count = err;