x86/entry/64: Initialize the top of the IRQ stack before switching stacks
authorAndy Lutomirski <[email protected]>
Tue, 11 Jul 2017 15:33:39 +0000 (10:33 -0500)
committerIngo Molnar <[email protected]>
Tue, 18 Jul 2017 08:56:23 +0000 (10:56 +0200)
The OOPS unwinder wants the word at the top of the IRQ stack to
point back to the previous stack at all times when the IRQ stack
is in use.  There's currently a one-instruction window in ENTER_IRQ_STACK
during which this isn't the case.  Fix it by writing the old RSP to the
top of the IRQ stack before jumping.

This currently writes the pointer to the stack twice, which is a bit
ugly.  We could get rid of this by replacing irq_stack_ptr with
irq_stack_ptr_minus_eight (better name welcome).  OTOH, there may be
all kinds of odd microarchitectural considerations in play that
affect performance by a few cycles here.

Reported-by: Mike Galbraith <[email protected]>
Reported-by: Josh Poimboeuf <[email protected]>
Signed-off-by: Andy Lutomirski <[email protected]>
Signed-off-by: Josh Poimboeuf <[email protected]>
Cc: Borislav Petkov <[email protected]>
Cc: Brian Gerst <[email protected]>
Cc: Denys Vlasenko <[email protected]>
Cc: H. Peter Anvin <[email protected]>
Cc: Jiri Slaby <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: [email protected]
Link: http://lkml.kernel.org/r/aae7e79e49914808440ad5310ace138ced2179ca.1499786555.git.jpoimboe@redhat.com
Signed-off-by: Ingo Molnar <[email protected]>
arch/x86/entry/entry_64.S

index 0d4483ae6360e69e0e9552b58bc1aeaa68ed677a..b56f7f23f21cc2634f762689aa01cdf1ebea7a30 100644 (file)
@@ -469,6 +469,7 @@ END(irq_entries_start)
        DEBUG_ENTRY_ASSERT_IRQS_OFF
        movq    %rsp, \old_rsp
        incl    PER_CPU_VAR(irq_count)
+       jnz     .Lirq_stack_push_old_rsp_\@
 
        /*
         * Right now, if we just incremented irq_count to zero, we've
@@ -478,9 +479,30 @@ END(irq_entries_start)
         * it must be *extremely* careful to limit its stack usage.  This
         * could include kprobes and a hypothetical future IST-less #DB
         * handler.
+        *
+        * The OOPS unwinder relies on the word at the top of the IRQ
+        * stack linking back to the previous RSP for the entire time we're
+        * on the IRQ stack.  For this to work reliably, we need to write
+        * it before we actually move ourselves to the IRQ stack.
+        */
+
+       movq    \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
+       movq    PER_CPU_VAR(irq_stack_ptr), %rsp
+
+#ifdef CONFIG_DEBUG_ENTRY
+       /*
+        * If the first movq above becomes wrong due to IRQ stack layout
+        * changes, the only way we'll notice is if we try to unwind right
+        * here.  Assert that we set up the stack right to catch this type
+        * of bug quickly.
         */
+       cmpq    -8(%rsp), \old_rsp
+       je      .Lirq_stack_okay\@
+       ud2
+       .Lirq_stack_okay\@:
+#endif
 
-       cmovzq  PER_CPU_VAR(irq_stack_ptr), %rsp
+.Lirq_stack_push_old_rsp_\@:
        pushq   \old_rsp
 .endm