mm: move lazily freed pages to inactive list
authorMinchan Kim <[email protected]>
Sat, 16 Jan 2016 00:55:11 +0000 (16:55 -0800)
committerLinus Torvalds <[email protected]>
Sat, 16 Jan 2016 01:56:32 +0000 (17:56 -0800)
MADV_FREE is a hint that it's okay to discard pages if there is memory
pressure and we use reclaimers(ie, kswapd and direct reclaim) to free
them so there is no value keeping them in the active anonymous LRU so
this patch moves them to inactive LRU list's head.

This means that MADV_FREE-ed pages which were living on the inactive
list are reclaimed first because they are more likely to be cold rather
than recently active pages.

An arguable issue for the approach would be whether we should put the
page to the head or tail of the inactive list.  I chose head because the
kernel cannot make sure it's really cold or warm for every MADV_FREE
usecase but at least we know it's not *hot*, so landing of inactive head
would be a comprimise for various usecases.

This fixes suboptimal behavior of MADV_FREE when pages living on the
active list will sit there for a long time even under memory pressure
while the inactive list is reclaimed heavily.  This basically breaks the
whole purpose of using MADV_FREE to help the system to free memory which
is might not be used.

Signed-off-by: Minchan Kim <[email protected]>
Acked-by: Hugh Dickins <[email protected]>
Acked-by: Michal Hocko <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Shaohua Li <[email protected]>
Cc: "James E.J. Bottomley" <[email protected]>
Cc: "Kirill A. Shutemov" <[email protected]>
Cc: <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Arnd Bergmann <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Catalin Marinas <[email protected]>
Cc: Chen Gang <[email protected]>
Cc: Chris Zankel <[email protected]>
Cc: Daniel Micay <[email protected]>
Cc: Darrick J. Wong <[email protected]>
Cc: David S. Miller <[email protected]>
Cc: Helge Deller <[email protected]>
Cc: Ivan Kokshaysky <[email protected]>
Cc: Jason Evans <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Matt Turner <[email protected]>
Cc: Max Filippov <[email protected]>
Cc: Michael Kerrisk <[email protected]>
Cc: Mika Penttil <[email protected]>
Cc: Ralf Baechle <[email protected]>
Cc: Richard Henderson <[email protected]>
Cc: Roland Dreier <[email protected]>
Cc: Russell King <[email protected]>
Cc: Will Deacon <[email protected]>
Cc: Wu Fengguang <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
include/linux/swap.h
mm/madvise.c
mm/swap.c

index a282933c5bc6bd96e01919a68cc2e3f01846bb9a..414e101cd06195fe60339e71a8dfd2487001df7e 100644 (file)
@@ -307,6 +307,7 @@ extern void lru_add_drain_cpu(int cpu);
 extern void lru_add_drain_all(void);
 extern void rotate_reclaimable_page(struct page *page);
 extern void deactivate_file_page(struct page *page);
+extern void deactivate_page(struct page *page);
 extern void swap_setup(void);
 
 extern void add_page_to_unevictable_list(struct page *page);
index 98e28e777ccbb3fcf2985469addf2bf4b3cf864c..4e945462280149dc8407c216e9b8a82746d40bee 100644 (file)
@@ -368,6 +368,8 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr,
                        ptent = pte_mkold(ptent);
                        ptent = pte_mkclean(ptent);
                        set_pte_at(mm, addr, pte, ptent);
+                       if (PageActive(page))
+                               deactivate_page(page);
                        tlb_remove_tlb_entry(tlb, pte, addr);
                }
        }
index abffc33bb97509ac173de691cb34675ee419760b..674e2c93da4e958780abf8b4c6de11c8407ef25e 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -45,6 +45,7 @@ int page_cluster;
 static DEFINE_PER_CPU(struct pagevec, lru_add_pvec);
 static DEFINE_PER_CPU(struct pagevec, lru_rotate_pvecs);
 static DEFINE_PER_CPU(struct pagevec, lru_deactivate_file_pvecs);
+static DEFINE_PER_CPU(struct pagevec, lru_deactivate_pvecs);
 
 /*
  * This path almost never happens for VM activity - pages are normally
@@ -554,6 +555,24 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec,
        update_page_reclaim_stat(lruvec, file, 0);
 }
 
+
+static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec,
+                           void *arg)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               int file = page_is_file_cache(page);
+               int lru = page_lru_base_type(page);
+
+               del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
+               ClearPageActive(page);
+               ClearPageReferenced(page);
+               add_page_to_lru_list(page, lruvec, lru);
+
+               __count_vm_event(PGDEACTIVATE);
+               update_page_reclaim_stat(lruvec, file, 0);
+       }
+}
+
 /*
  * Drain pages out of the cpu's pagevecs.
  * Either "cpu" is the current CPU, and preemption has already been
@@ -580,6 +599,10 @@ void lru_add_drain_cpu(int cpu)
        if (pagevec_count(pvec))
                pagevec_lru_move_fn(pvec, lru_deactivate_file_fn, NULL);
 
+       pvec = &per_cpu(lru_deactivate_pvecs, cpu);
+       if (pagevec_count(pvec))
+               pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+
        activate_page_drain(cpu);
 }
 
@@ -609,6 +632,26 @@ void deactivate_file_page(struct page *page)
        }
 }
 
+/**
+ * deactivate_page - deactivate a page
+ * @page: page to deactivate
+ *
+ * deactivate_page() moves @page to the inactive list if @page was on the active
+ * list and was not an unevictable page.  This is done to accelerate the reclaim
+ * of @page.
+ */
+void deactivate_page(struct page *page)
+{
+       if (PageLRU(page) && PageActive(page) && !PageUnevictable(page)) {
+               struct pagevec *pvec = &get_cpu_var(lru_deactivate_pvecs);
+
+               page_cache_get(page);
+               if (!pagevec_add(pvec, page))
+                       pagevec_lru_move_fn(pvec, lru_deactivate_fn, NULL);
+               put_cpu_var(lru_deactivate_pvecs);
+       }
+}
+
 void lru_add_drain(void)
 {
        lru_add_drain_cpu(get_cpu());
@@ -638,6 +681,7 @@ void lru_add_drain_all(void)
                if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
                    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
                    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
+                   pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
                    need_activate_page_drain(cpu)) {
                        INIT_WORK(work, lru_add_drain_per_cpu);
                        schedule_work_on(cpu, work);