mm: warn about VmData over RLIMIT_DATA
authorKonstantin Khlebnikov <[email protected]>
Wed, 3 Feb 2016 00:57:43 +0000 (16:57 -0800)
committerLinus Torvalds <[email protected]>
Wed, 3 Feb 2016 16:28:43 +0000 (08:28 -0800)
This patch provides a way of working around a slight regression
introduced by commit 84638335900f ("mm: rework virtual memory
accounting").

Before that commit RLIMIT_DATA have control only over size of the brk
region.  But that change have caused problems with all existing versions
of valgrind, because it set RLIMIT_DATA to zero.

This patch fixes rlimit check (limit actually in bytes, not pages) and
by default turns it into warning which prints at first VmData misuse:

  "mmap: top (795): VmData 516096 exceed data ulimit 512000.  Will be forbidden soon."

Behavior is controlled by boot param ignore_rlimit_data=y/n and by sysfs
/sys/module/kernel/parameters/ignore_rlimit_data.  For now it set to "y".

[[email protected]: tweak kernel-parameters.txt text[
Signed-off-by: Konstantin Khlebnikov <[email protected]>
Link: http://lkml.kernel.org/r/20151228211015.GL2194@uranus
Reported-by: Christian Borntraeger <[email protected]>
Cc: Cyrill Gorcunov <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Vegard Nossum <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Andy Lutomirski <[email protected]>
Cc: Quentin Casasnovas <[email protected]>
Cc: Kees Cook <[email protected]>
Cc: Willy Tarreau <[email protected]>
Cc: Pavel Emelyanov <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
Documentation/kernel-parameters.txt
mm/internal.h
mm/mmap.c

index 87d40a72f6a1bec998be718138347a875c00117a..551ecf09c8dd820be865ebbbc22fa6b5f608dd98 100644 (file)
@@ -1496,6 +1496,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
                        could change it dynamically, usually by
                        /sys/module/printk/parameters/ignore_loglevel.
 
+       ignore_rlimit_data
+                       Ignore RLIMIT_DATA setting for data mappings,
+                       print warning at first misuse.  Can be changed via
+                       /sys/module/kernel/parameters/ignore_rlimit_data.
+
        ihash_entries=  [KNL]
                        Set number of hash buckets for inode cache.
 
index ed8b5ffcf9b16fbfcf3ccba0d182957980ad45ab..6e976302ddd800870466d0cff0a0dafa98d15b59 100644 (file)
@@ -216,6 +216,22 @@ static inline bool is_cow_mapping(vm_flags_t flags)
        return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 }
 
+static inline bool is_exec_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_EXEC | VM_WRITE)) == VM_EXEC;
+}
+
+static inline bool is_stack_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN))) != 0;
+}
+
+static inline bool is_data_mapping(vm_flags_t flags)
+{
+       return (flags & ((VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)) |
+                                       VM_WRITE | VM_SHARED)) == VM_WRITE;
+}
+
 /* mm/util.c */
 void __vma_link_list(struct mm_struct *mm, struct vm_area_struct *vma,
                struct vm_area_struct *prev, struct rb_node *rb_parent);
index 84b12624ceb01d83762172634179825b086961fd..cfc0cdca421ec4c90449395a4e53ef7284aed308 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -42,6 +42,7 @@
 #include <linux/memory.h>
 #include <linux/printk.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/moduleparam.h>
 
 #include <asm/uaccess.h>
 #include <asm/cacheflush.h>
@@ -69,6 +70,8 @@ const int mmap_rnd_compat_bits_max = CONFIG_ARCH_MMAP_RND_COMPAT_BITS_MAX;
 int mmap_rnd_compat_bits __read_mostly = CONFIG_ARCH_MMAP_RND_COMPAT_BITS;
 #endif
 
+static bool ignore_rlimit_data = true;
+core_param(ignore_rlimit_data, ignore_rlimit_data, bool, 0644);
 
 static void unmap_region(struct mm_struct *mm,
                struct vm_area_struct *vma, struct vm_area_struct *prev,
@@ -2982,9 +2985,17 @@ bool may_expand_vm(struct mm_struct *mm, vm_flags_t flags, unsigned long npages)
        if (mm->total_vm + npages > rlimit(RLIMIT_AS) >> PAGE_SHIFT)
                return false;
 
-       if ((flags & (VM_WRITE | VM_SHARED | (VM_STACK_FLAGS &
-                               (VM_GROWSUP | VM_GROWSDOWN)))) == VM_WRITE)
-               return mm->data_vm + npages <= rlimit(RLIMIT_DATA);
+       if (is_data_mapping(flags) &&
+           mm->data_vm + npages > rlimit(RLIMIT_DATA) >> PAGE_SHIFT) {
+               if (ignore_rlimit_data)
+                       pr_warn_once("%s (%d): VmData %lu exceed data ulimit "
+                                    "%lu. Will be forbidden soon.\n",
+                                    current->comm, current->pid,
+                                    (mm->data_vm + npages) << PAGE_SHIFT,
+                                    rlimit(RLIMIT_DATA));
+               else
+                       return false;
+       }
 
        return true;
 }
@@ -2993,11 +3004,11 @@ void vm_stat_account(struct mm_struct *mm, vm_flags_t flags, long npages)
 {
        mm->total_vm += npages;
 
-       if ((flags & (VM_EXEC | VM_WRITE)) == VM_EXEC)
+       if (is_exec_mapping(flags))
                mm->exec_vm += npages;
-       else if (flags & (VM_STACK_FLAGS & (VM_GROWSUP | VM_GROWSDOWN)))
+       else if (is_stack_mapping(flags))
                mm->stack_vm += npages;
-       else if ((flags & (VM_WRITE | VM_SHARED)) == VM_WRITE)
+       else if (is_data_mapping(flags))
                mm->data_vm += npages;
 }