vmscan: tracing: add trace events for kswapd wakeup, sleeping and direct reclaim
authorMel Gorman <[email protected]>
Tue, 10 Aug 2010 00:19:16 +0000 (17:19 -0700)
committerLinus Torvalds <[email protected]>
Tue, 10 Aug 2010 03:44:59 +0000 (20:44 -0700)
Add two trace events for kswapd waking up and going asleep for the
purposes of tracking kswapd activity and two trace events for direct
reclaim beginning and ending.  The information can be used to work out how
much time a process or the system is spending on the reclamation of pages
and in the case of direct reclaim, how many pages were reclaimed for that
process.  High frequency triggering of these events could point to memory
pressure problems.

Signed-off-by: Mel Gorman <[email protected]>
Acked-by: Rik van Riel <[email protected]>
Acked-by: Larry Woodman <[email protected]>
Cc: Dave Chinner <[email protected]>
Cc: Chris Mason <[email protected]>
Cc: Nick Piggin <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Christoph Hellwig <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: Andrea Arcangeli <[email protected]>
Cc: Michael Rubin <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
include/trace/events/gfpflags.h [new file with mode: 0644]
include/trace/events/kmem.h
include/trace/events/vmscan.h [new file with mode: 0644]
mm/vmscan.c

diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h
new file mode 100644 (file)
index 0000000..e3615c0
--- /dev/null
@@ -0,0 +1,37 @@
+/*
+ * The order of these masks is important. Matching masks will be seen
+ * first and the left over flags will end up showing by themselves.
+ *
+ * For example, if we have GFP_KERNEL before GFP_USER we wil get:
+ *
+ *  GFP_KERNEL|GFP_HARDWALL
+ *
+ * Thus most bits set go first.
+ */
+#define show_gfp_flags(flags)                                          \
+       (flags) ? __print_flags(flags, "|",                             \
+       {(unsigned long)GFP_HIGHUSER_MOVABLE,   "GFP_HIGHUSER_MOVABLE"}, \
+       {(unsigned long)GFP_HIGHUSER,           "GFP_HIGHUSER"},        \
+       {(unsigned long)GFP_USER,               "GFP_USER"},            \
+       {(unsigned long)GFP_TEMPORARY,          "GFP_TEMPORARY"},       \
+       {(unsigned long)GFP_KERNEL,             "GFP_KERNEL"},          \
+       {(unsigned long)GFP_NOFS,               "GFP_NOFS"},            \
+       {(unsigned long)GFP_ATOMIC,             "GFP_ATOMIC"},          \
+       {(unsigned long)GFP_NOIO,               "GFP_NOIO"},            \
+       {(unsigned long)__GFP_HIGH,             "GFP_HIGH"},            \
+       {(unsigned long)__GFP_WAIT,             "GFP_WAIT"},            \
+       {(unsigned long)__GFP_IO,               "GFP_IO"},              \
+       {(unsigned long)__GFP_COLD,             "GFP_COLD"},            \
+       {(unsigned long)__GFP_NOWARN,           "GFP_NOWARN"},          \
+       {(unsigned long)__GFP_REPEAT,           "GFP_REPEAT"},          \
+       {(unsigned long)__GFP_NOFAIL,           "GFP_NOFAIL"},          \
+       {(unsigned long)__GFP_NORETRY,          "GFP_NORETRY"},         \
+       {(unsigned long)__GFP_COMP,             "GFP_COMP"},            \
+       {(unsigned long)__GFP_ZERO,             "GFP_ZERO"},            \
+       {(unsigned long)__GFP_NOMEMALLOC,       "GFP_NOMEMALLOC"},      \
+       {(unsigned long)__GFP_HARDWALL,         "GFP_HARDWALL"},        \
+       {(unsigned long)__GFP_THISNODE,         "GFP_THISNODE"},        \
+       {(unsigned long)__GFP_RECLAIMABLE,      "GFP_RECLAIMABLE"},     \
+       {(unsigned long)__GFP_MOVABLE,          "GFP_MOVABLE"}          \
+       ) : "GFP_NOWAIT"
+
index 3adca0ca9dbee10479d34d5a3e3562609ef89e86..a9c87ad8331c61de25b9d2a51489de398a03d306 100644 (file)
@@ -6,43 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/tracepoint.h>
-
-/*
- * The order of these masks is important. Matching masks will be seen
- * first and the left over flags will end up showing by themselves.
- *
- * For example, if we have GFP_KERNEL before GFP_USER we wil get:
- *
- *  GFP_KERNEL|GFP_HARDWALL
- *
- * Thus most bits set go first.
- */
-#define show_gfp_flags(flags)                                          \
-       (flags) ? __print_flags(flags, "|",                             \
-       {(unsigned long)GFP_HIGHUSER_MOVABLE,   "GFP_HIGHUSER_MOVABLE"}, \
-       {(unsigned long)GFP_HIGHUSER,           "GFP_HIGHUSER"},        \
-       {(unsigned long)GFP_USER,               "GFP_USER"},            \
-       {(unsigned long)GFP_TEMPORARY,          "GFP_TEMPORARY"},       \
-       {(unsigned long)GFP_KERNEL,             "GFP_KERNEL"},          \
-       {(unsigned long)GFP_NOFS,               "GFP_NOFS"},            \
-       {(unsigned long)GFP_ATOMIC,             "GFP_ATOMIC"},          \
-       {(unsigned long)GFP_NOIO,               "GFP_NOIO"},            \
-       {(unsigned long)__GFP_HIGH,             "GFP_HIGH"},            \
-       {(unsigned long)__GFP_WAIT,             "GFP_WAIT"},            \
-       {(unsigned long)__GFP_IO,               "GFP_IO"},              \
-       {(unsigned long)__GFP_COLD,             "GFP_COLD"},            \
-       {(unsigned long)__GFP_NOWARN,           "GFP_NOWARN"},          \
-       {(unsigned long)__GFP_REPEAT,           "GFP_REPEAT"},          \
-       {(unsigned long)__GFP_NOFAIL,           "GFP_NOFAIL"},          \
-       {(unsigned long)__GFP_NORETRY,          "GFP_NORETRY"},         \
-       {(unsigned long)__GFP_COMP,             "GFP_COMP"},            \
-       {(unsigned long)__GFP_ZERO,             "GFP_ZERO"},            \
-       {(unsigned long)__GFP_NOMEMALLOC,       "GFP_NOMEMALLOC"},      \
-       {(unsigned long)__GFP_HARDWALL,         "GFP_HARDWALL"},        \
-       {(unsigned long)__GFP_THISNODE,         "GFP_THISNODE"},        \
-       {(unsigned long)__GFP_RECLAIMABLE,      "GFP_RECLAIMABLE"},     \
-       {(unsigned long)__GFP_MOVABLE,          "GFP_MOVABLE"}          \
-       ) : "GFP_NOWAIT"
+#include "gfpflags.h"
 
 DECLARE_EVENT_CLASS(kmem_alloc,
 
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
new file mode 100644 (file)
index 0000000..f76521f
--- /dev/null
@@ -0,0 +1,115 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM vmscan
+
+#if !defined(_TRACE_VMSCAN_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_VMSCAN_H
+
+#include <linux/types.h>
+#include <linux/tracepoint.h>
+#include "gfpflags.h"
+
+TRACE_EVENT(mm_vmscan_kswapd_sleep,
+
+       TP_PROTO(int nid),
+
+       TP_ARGS(nid),
+
+       TP_STRUCT__entry(
+               __field(        int,    nid     )
+       ),
+
+       TP_fast_assign(
+               __entry->nid    = nid;
+       ),
+
+       TP_printk("nid=%d", __entry->nid)
+);
+
+TRACE_EVENT(mm_vmscan_kswapd_wake,
+
+       TP_PROTO(int nid, int order),
+
+       TP_ARGS(nid, order),
+
+       TP_STRUCT__entry(
+               __field(        int,    nid     )
+               __field(        int,    order   )
+       ),
+
+       TP_fast_assign(
+               __entry->nid    = nid;
+               __entry->order  = order;
+       ),
+
+       TP_printk("nid=%d order=%d", __entry->nid, __entry->order)
+);
+
+TRACE_EVENT(mm_vmscan_wakeup_kswapd,
+
+       TP_PROTO(int nid, int zid, int order),
+
+       TP_ARGS(nid, zid, order),
+
+       TP_STRUCT__entry(
+               __field(        int,            nid     )
+               __field(        int,            zid     )
+               __field(        int,            order   )
+       ),
+
+       TP_fast_assign(
+               __entry->nid            = nid;
+               __entry->zid            = zid;
+               __entry->order          = order;
+       ),
+
+       TP_printk("nid=%d zid=%d order=%d",
+               __entry->nid,
+               __entry->zid,
+               __entry->order)
+);
+
+TRACE_EVENT(mm_vmscan_direct_reclaim_begin,
+
+       TP_PROTO(int order, int may_writepage, gfp_t gfp_flags),
+
+       TP_ARGS(order, may_writepage, gfp_flags),
+
+       TP_STRUCT__entry(
+               __field(        int,    order           )
+               __field(        int,    may_writepage   )
+               __field(        gfp_t,  gfp_flags       )
+       ),
+
+       TP_fast_assign(
+               __entry->order          = order;
+               __entry->may_writepage  = may_writepage;
+               __entry->gfp_flags      = gfp_flags;
+       ),
+
+       TP_printk("order=%d may_writepage=%d gfp_flags=%s",
+               __entry->order,
+               __entry->may_writepage,
+               show_gfp_flags(__entry->gfp_flags))
+);
+
+TRACE_EVENT(mm_vmscan_direct_reclaim_end,
+
+       TP_PROTO(unsigned long nr_reclaimed),
+
+       TP_ARGS(nr_reclaimed),
+
+       TP_STRUCT__entry(
+               __field(        unsigned long,  nr_reclaimed    )
+       ),
+
+       TP_fast_assign(
+               __entry->nr_reclaimed   = nr_reclaimed;
+       ),
+
+       TP_printk("nr_reclaimed=%lu", __entry->nr_reclaimed)
+);
+
+#endif /* _TRACE_VMSCAN_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index 6dafa45d79e4cd64e6c81ecd024c60a9dea0d258..c99bc418c4cf033559fa4fe274a10cdd2d8ea6c3 100644 (file)
@@ -48,6 +48,9 @@
 
 #include "internal.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/vmscan.h>
+
 struct scan_control {
        /* Incremented by the number of inactive pages that were scanned */
        unsigned long nr_scanned;
@@ -1883,6 +1886,7 @@ out:
 unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                gfp_t gfp_mask, nodemask_t *nodemask)
 {
+       unsigned long nr_reclaimed;
        struct scan_control sc = {
                .gfp_mask = gfp_mask,
                .may_writepage = !laptop_mode,
@@ -1895,7 +1899,15 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                .nodemask = nodemask,
        };
 
-       return do_try_to_free_pages(zonelist, &sc);
+       trace_mm_vmscan_direct_reclaim_begin(order,
+                               sc.may_writepage,
+                               gfp_mask);
+
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
+       trace_mm_vmscan_direct_reclaim_end(nr_reclaimed);
+
+       return nr_reclaimed;
 }
 
 #ifdef CONFIG_CGROUP_MEM_RES_CTLR
@@ -2294,9 +2306,10 @@ static int kswapd(void *p)
                                 * premature sleep. If not, then go fully
                                 * to sleep until explicitly woken up
                                 */
-                               if (!sleeping_prematurely(pgdat, order, remaining))
+                               if (!sleeping_prematurely(pgdat, order, remaining)) {
+                                       trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
                                        schedule();
-                               else {
+                               else {
                                        if (remaining)
                                                count_vm_event(KSWAPD_LOW_WMARK_HIT_QUICKLY);
                                        else
@@ -2316,8 +2329,10 @@ static int kswapd(void *p)
                 * We can speed up thawing tasks if we don't call balance_pgdat
                 * after returning from the refrigerator
                 */
-               if (!ret)
+               if (!ret) {
+                       trace_mm_vmscan_kswapd_wake(pgdat->node_id, order);
                        balance_pgdat(pgdat, order);
+               }
        }
        return 0;
 }
@@ -2337,6 +2352,7 @@ void wakeup_kswapd(struct zone *zone, int order)
                return;
        if (pgdat->kswapd_max_order < order)
                pgdat->kswapd_max_order = order;
+       trace_mm_vmscan_wakeup_kswapd(pgdat->node_id, zone_idx(zone), order);
        if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
                return;
        if (!waitqueue_active(&pgdat->kswapd_wait))