From 4687518c4cb7807fbeff21770e309080f9eb7f2f Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 11 Nov 2008 13:03:07 -0800 Subject: [PATCH 01/26] x86: 32 bit: interrupt stub consistency with 64 bit Don't generate interrupt stubs for interrupt vectors below FIRST_EXTERNAL_VECTOR, and make the table of interrupt vectors (interrupt[]) __initconst. Both of these changes both conserve memory and improve consistency with 64 bits. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hw_irq.h | 2 +- arch/x86/kernel/entry_32.S | 6 +++--- arch/x86/kernel/irqinit_32.c | 2 +- arch/x86/lguest/boot.c | 3 ++- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index b97aecb0b61..27d33f92afe 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -110,7 +110,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); #endif #ifdef CONFIG_X86_32 -extern void (*const interrupt[NR_VECTORS])(void); +extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); #endif typedef int vector_irq_t[NR_VECTORS]; diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 28b597ef9ca..4aea95652cf 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -622,16 +622,16 @@ END(syscall_badsys) * Build the entry stubs and pointer table with * some assembler magic. */ -.section .rodata,"a" +.section .init.rodata,"a" ENTRY(interrupt) .text ENTRY(irq_entries_start) RING0_INT_FRAME -vector=0 +vector=FIRST_EXTERNAL_VECTOR .rept NR_VECTORS ALIGN - .if vector + .if vector != FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 .endif 1: pushl $~(vector) diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c index 845aa9803e8..607db63044a 100644 --- a/arch/x86/kernel/irqinit_32.c +++ b/arch/x86/kernel/irqinit_32.c @@ -129,7 +129,7 @@ void __init native_init_IRQ(void) for (i = FIRST_EXTERNAL_VECTOR; i < NR_VECTORS; i++) { /* SYSCALL_VECTOR was reserved in trap_init. */ if (i != SYSCALL_VECTOR) - set_intr_gate(i, interrupt[i]); + set_intr_gate(i, interrupt[i-FIRST_EXTERNAL_VECTOR]); } diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index a5d8e1ace1c..50a779264bb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -590,7 +590,8 @@ static void __init lguest_init_IRQ(void) * a straightforward 1 to 1 mapping, so force that here. */ __get_cpu_var(vector_irq)[vector] = i; if (vector != SYSCALL_VECTOR) { - set_intr_gate(vector, interrupt[vector]); + set_intr_gate(vector, + interrupt[vector-FIRST_EXTERNAL_VECTOR]); set_irq_chip_and_handler_name(i, &lguest_irq_controller, handle_level_irq, "level"); From b7c6244f13d37592003b46e12500a90e9781ad9d Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 11 Nov 2008 13:24:58 -0800 Subject: [PATCH 02/26] x86: 32 bits: shrink and align IRQ stubs Shrink the IRQ stubs on 32 bits down to just over four bytes per (we fit seven into a 32-byte chunk.) This shrinks the total icache consumption of the IRQ stubs down to an even kilobyte, if all of them are in active use. The downside is that we end up with a double jump, which could have a negative effect on some pipelines. The double jump is always inside the same cacheline on any modern chips (the exception being 486/Elan/Geode which have only 16-byte cachelines, but are unlikely to have too many interrupt sources.) To get the most effect, cache-align the IRQ stubs. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 34 ++++++++++++++++++++++------------ 1 file changed, 22 insertions(+), 12 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 4aea95652cf..dae81b9fd45 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -619,28 +619,37 @@ END(syscall_badsys) 27:; /* - * Build the entry stubs and pointer table with - * some assembler magic. + * Build the entry stubs and pointer table with some assembler magic. + * We pack 7 stubs into a single 32-byte chunk, which will fit in a + * single cache line on all modern x86 implementations. */ .section .init.rodata,"a" ENTRY(interrupt) .text - + .p2align 5 + .p2align CONFIG_X86_L1_CACHE_SHIFT ENTRY(irq_entries_start) RING0_INT_FRAME vector=FIRST_EXTERNAL_VECTOR -.rept NR_VECTORS - ALIGN - .if vector != FIRST_EXTERNAL_VECTOR +.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 + .balign 32 + .rept 7 + .if vector < NR_VECTORS + .if vector != FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 - .endif -1: pushl $~(vector) + .endif +1: pushl $(~vector+0x80) /* Note: always in signed byte range */ CFI_ADJUST_CFA_OFFSET 4 - jmp common_interrupt - .previous + .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6 + jmp 2f + .endif + .previous .long 1b - .text + .text vector=vector+1 + .endif + .endr +2: jmp common_interrupt .endr END(irq_entries_start) @@ -652,8 +661,9 @@ END(interrupt) * the CPU automatically disables interrupts when executing an IRQ vector, * so IRQ-flags tracing has to follow that: */ - ALIGN + .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: + addl $-0x80,(%esp) /* Adjust vector into the [-256,-1] range */ SAVE_ALL TRACE_IRQS_OFF movl %esp,%eax From 939b787130bf22887a09d8fd2641a094dcef8c22 Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Tue, 11 Nov 2008 13:51:52 -0800 Subject: [PATCH 03/26] x86: 64 bits: shrink and align IRQ stubs Move the IRQ stub generation to assembly to simplify it and for consistency with 32 bits. Doing it in a C file with asm() statements doesn't help clarity, and it prevents some optimizations. Shrink the IRQ stubs down to just over four bytes per (we fit seven into a 32-byte chunk.) This shrinks the total icache consumption of the IRQ stubs down to an even kilobyte, if all of them are in active use. The downside is that we end up with a double jump, which could have a negative effect on some pipelines. The double jump is always inside the same cacheline on any modern chips. To get the most effect, cache-align the IRQ stubs. This makes the 64-bit code match changes already done to the 32-bit code, and should open up irqinit*.c for unification. Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/hw_irq.h | 2 -- arch/x86/kernel/entry_64.S | 48 +++++++++++++++++++++++-- arch/x86/kernel/irqinit_64.c | 66 ----------------------------------- 3 files changed, 45 insertions(+), 71 deletions(-) diff --git a/arch/x86/include/asm/hw_irq.h b/arch/x86/include/asm/hw_irq.h index 27d33f92afe..8de644b6b95 100644 --- a/arch/x86/include/asm/hw_irq.h +++ b/arch/x86/include/asm/hw_irq.h @@ -109,9 +109,7 @@ extern asmlinkage void smp_invalidate_interrupt(struct pt_regs *); #endif #endif -#ifdef CONFIG_X86_32 extern void (*__initconst interrupt[NR_VECTORS-FIRST_EXTERNAL_VECTOR])(void); -#endif typedef int vector_irq_t[NR_VECTORS]; DECLARE_PER_CPU(vector_irq_t, vector_irq); diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index b86f332c96a..9b2aeaac9a6 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -627,6 +627,46 @@ END(stub_rt_sigreturn) vector already pushed) */ #define XCPT_FRAME _frame ORIG_RAX +/* + * Build the entry stubs and pointer table with some assembler magic. + * We pack 7 stubs into a single 32-byte chunk, which will fit in a + * single cache line on all modern x86 implementations. + */ + .section .init.rodata,"a" +ENTRY(interrupt) + .text + .p2align 5 + .p2align CONFIG_X86_L1_CACHE_SHIFT +ENTRY(irq_entries_start) + INTR_FRAME +vector=FIRST_EXTERNAL_VECTOR +.rept (NR_VECTORS-FIRST_EXTERNAL_VECTOR+6)/7 + .balign 32 + .rept 7 + .if vector < NR_VECTORS + .if vector != FIRST_EXTERNAL_VECTOR + CFI_ADJUST_CFA_OFFSET -8 + .endif +1: pushq $(~vector+0x80) /* Note: always in signed byte range */ + CFI_ADJUST_CFA_OFFSET 8 + .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6 + jmp 2f + .endif + .previous + .quad 1b + .text +vector=vector+1 + .endif + .endr +2: jmp common_interrupt +.endr + CFI_ENDPROC +END(irq_entries_start) + +.previous +END(interrupt) +.previous + /* * Interrupt entry/exit. * @@ -635,11 +675,12 @@ END(stub_rt_sigreturn) * Entry runs with interrupts off. */ -/* 0(%rsp): interrupt number */ +/* 0(%rsp): ~(interrupt number)+0x80 */ .macro interrupt func + addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ cld SAVE_ARGS - leaq -ARGOFFSET(%rsp),%rdi # arg1 for handler + leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */ pushq %rbp /* * Save rbp twice: One is for marking the stack frame, as usual, and the @@ -670,7 +711,8 @@ END(stub_rt_sigreturn) call \func .endm -ENTRY(common_interrupt) + .p2align CONFIG_X86_L1_CACHE_SHIFT +common_interrupt: XCPT_FRAME interrupt do_IRQ /* 0(%rsp): oldrsp-ARGOFFSET */ diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c index ff023539128..8670b3ce626 100644 --- a/arch/x86/kernel/irqinit_64.c +++ b/arch/x86/kernel/irqinit_64.c @@ -23,41 +23,6 @@ #include #include -/* - * Common place to define all x86 IRQ vectors - * - * This builds up the IRQ handler stubs using some ugly macros in irq.h - * - * These macros create the low-level assembly IRQ routines that save - * register context and call do_IRQ(). do_IRQ() then does all the - * operations that are needed to keep the AT (or SMP IOAPIC) - * interrupt-controller happy. - */ - -#define IRQ_NAME2(nr) nr##_interrupt(void) -#define IRQ_NAME(nr) IRQ_NAME2(IRQ##nr) - -/* - * SMP has a few special interrupts for IPI messages - */ - -#define BUILD_IRQ(nr) \ - asmlinkage void IRQ_NAME(nr); \ - asm("\n.text\n.p2align\n" \ - "IRQ" #nr "_interrupt:\n\t" \ - "push $~(" #nr ") ; " \ - "jmp common_interrupt\n" \ - ".previous"); - -#define BI(x,y) \ - BUILD_IRQ(x##y) - -#define BUILD_16_IRQS(x) \ - BI(x,0) BI(x,1) BI(x,2) BI(x,3) \ - BI(x,4) BI(x,5) BI(x,6) BI(x,7) \ - BI(x,8) BI(x,9) BI(x,a) BI(x,b) \ - BI(x,c) BI(x,d) BI(x,e) BI(x,f) - /* * ISA PIC or low IO-APIC triggered (INTA-cycle or APIC) interrupts: * (these are usually mapped to vectors 0x30-0x3f) @@ -73,37 +38,6 @@ * * (these are usually mapped into the 0x30-0xff vector range) */ - BUILD_16_IRQS(0x2) BUILD_16_IRQS(0x3) -BUILD_16_IRQS(0x4) BUILD_16_IRQS(0x5) BUILD_16_IRQS(0x6) BUILD_16_IRQS(0x7) -BUILD_16_IRQS(0x8) BUILD_16_IRQS(0x9) BUILD_16_IRQS(0xa) BUILD_16_IRQS(0xb) -BUILD_16_IRQS(0xc) BUILD_16_IRQS(0xd) BUILD_16_IRQS(0xe) BUILD_16_IRQS(0xf) - -#undef BUILD_16_IRQS -#undef BI - - -#define IRQ(x,y) \ - IRQ##x##y##_interrupt - -#define IRQLIST_16(x) \ - IRQ(x,0), IRQ(x,1), IRQ(x,2), IRQ(x,3), \ - IRQ(x,4), IRQ(x,5), IRQ(x,6), IRQ(x,7), \ - IRQ(x,8), IRQ(x,9), IRQ(x,a), IRQ(x,b), \ - IRQ(x,c), IRQ(x,d), IRQ(x,e), IRQ(x,f) - -/* for the irq vectors */ -static void (*__initdata interrupt[NR_VECTORS - FIRST_EXTERNAL_VECTOR])(void) = { - IRQLIST_16(0x2), IRQLIST_16(0x3), - IRQLIST_16(0x4), IRQLIST_16(0x5), IRQLIST_16(0x6), IRQLIST_16(0x7), - IRQLIST_16(0x8), IRQLIST_16(0x9), IRQLIST_16(0xa), IRQLIST_16(0xb), - IRQLIST_16(0xc), IRQLIST_16(0xd), IRQLIST_16(0xe), IRQLIST_16(0xf) -}; - -#undef IRQ -#undef IRQLIST_16 - - - /* * IRQ2 is cascade interrupt to second interrupt controller From 8665596ec05498525014436520b316ba174a068a Mon Sep 17 00:00:00 2001 From: "H. Peter Anvin" Date: Wed, 12 Nov 2008 10:27:35 -0800 Subject: [PATCH 04/26] x86: fix up the new IRQ code for older versions of gas Older versions of gas don't implement the C-style != operator, they instead want the Pascal-style <> operator. Change != to <> so we don't break compilation with those old versions of gas. Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_32.S | 4 ++-- arch/x86/kernel/entry_64.S | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index dae81b9fd45..bd02ec77edc 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -635,12 +635,12 @@ vector=FIRST_EXTERNAL_VECTOR .balign 32 .rept 7 .if vector < NR_VECTORS - .if vector != FIRST_EXTERNAL_VECTOR + .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -4 .endif 1: pushl $(~vector+0x80) /* Note: always in signed byte range */ CFI_ADJUST_CFA_OFFSET 4 - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6 + .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 jmp 2f .endif .previous diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 9b2aeaac9a6..2b42362a85b 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -644,12 +644,12 @@ vector=FIRST_EXTERNAL_VECTOR .balign 32 .rept 7 .if vector < NR_VECTORS - .if vector != FIRST_EXTERNAL_VECTOR + .if vector <> FIRST_EXTERNAL_VECTOR CFI_ADJUST_CFA_OFFSET -8 .endif 1: pushq $(~vector+0x80) /* Note: always in signed byte range */ CFI_ADJUST_CFA_OFFSET 8 - .if ((vector-FIRST_EXTERNAL_VECTOR)%7) != 6 + .if ((vector-FIRST_EXTERNAL_VECTOR)%7) <> 6 jmp 2f .endif .previous From 722024dbb74f3ea316c285c0a71a4512e113b0c4 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Thu, 13 Nov 2008 13:50:20 +0100 Subject: [PATCH 05/26] x86: irq: fix apicinterrupts on 64 bits Impact: Fix interrupt via the apicinterrupt macro Checkin 939b787130bf22887a09d8fd2641a094dcef8c22 changed the "interrupt" macro, but the "interrupt" macro is also invoked indirectly from the "apicinterrupt" macro. The "apicinterrupt" macro probably should have its own collection of systematic stubs for the same reason the main IRQ code does; as is it is a huge amount of replicated code. Signed-off-by: Alexander van Heukelum Signed-off-by: H. Peter Anvin --- arch/x86/kernel/entry_64.S | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 2b42362a85b..369de6973c5 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -675,9 +675,8 @@ END(interrupt) * Entry runs with interrupts off. */ -/* 0(%rsp): ~(interrupt number)+0x80 */ +/* 0(%rsp): ~(interrupt number) */ .macro interrupt func - addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ cld SAVE_ARGS leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */ @@ -711,9 +710,14 @@ END(interrupt) call \func .endm + /* + * The interrupt stubs push (~vector+0x80) onto the stack and + * then jump to common_interrupt. + */ .p2align CONFIG_X86_L1_CACHE_SHIFT common_interrupt: XCPT_FRAME + addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ interrupt do_IRQ /* 0(%rsp): oldrsp-ARGOFFSET */ ret_from_intr: From d99015b1abbad743aa049b439c1e1dede6d0fa49 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Wed, 19 Nov 2008 01:18:11 +0100 Subject: [PATCH 06/26] x86: move entry_64.S register saving out of the macros Here is a combined patch that moves "save_args" out-of-line for the interrupt macro and moves "error_entry" mostly out-of-line for the zeroentry and errorentry macros. The save_args function becomes really straightforward and easy to understand, with the possible exception of the stack switch code, which now needs to copy the return address of to the calling function. Normal interrupts arrive with ((~vector)-0x80) on the stack, which gets adjusted in common_interrupt: : (5) addq $0xffffffffffffff80,(%rsp) /* -> ~(vector) */ (4) sub $0x50,%rsp /* space for registers */ (5) callq ffffffff80211290 (5) callq ffffffff80214290 : ... An apic interrupt stub now look like this: : (5) pushq $0xffffffffffffff05 /* ~(vector) */ (4) sub $0x50,%rsp /* space for registers */ (5) callq ffffffff80211290 (5) callq ffffffff80212b8f (5) jmpq ffffffff80211f93 Similarly the exception handler register saving function becomes simpler, without the need of any parameter shuffling. The stub for an exception without errorcode looks like this: : (6) callq *0x1cad12(%rip) # ffffffff803dd448 (2) pushq $0xffffffffffffffff /* no syscall */ (4) sub $0x78,%rsp /* space for registers */ (5) callq ffffffff8030e3b0 (3) mov %rsp,%rdi /* pt_regs pointer */ (2) xor %esi,%esi /* no error code */ (5) callq ffffffff80213446 (5) jmpq ffffffff8030e460 And one for an exception with errorcode like this: : (6) callq *0x1cab92(%rip) # ffffffff803dd448 (4) sub $0x78,%rsp /* space for registers */ (5) callq ffffffff8030e3b0 (3) mov %rsp,%rdi /* pt_regs pointer */ (5) mov 0x78(%rsp),%rsi /* load error code */ (9) movq $0xffffffffffffffff,0x78(%rsp) /* no syscall */ (5) callq ffffffff80213209 (5) jmpq ffffffff8030e460 Unfortunately, this last type is more than 32 bytes. But the total space savings due to this patch is about 2500 bytes on an smp-configuration, and I think the code is clearer than it was before. The tested kernels were non-paravirt ones (i.e., without the indirect call at the top of the exception handlers). Anyhow, I tested this patch on top of a recent -tip. The machine was an 2x4-core Xeon at 2333MHz. Measured where the delays between (almost-)adjacent rdtsc instructions. The graphs show how much time is spent outside of the program as a function of the measured delay. The area under the graph represents the total time spent outside the program. Eight instances of the rdtsctest were started, each pinned to a single cpu. The histogams are added. For each kernel two measurements were done: one in mostly idle condition, the other while running "bonnie++ -f", bound to cpu 0. Each measurement took 40 minutes runtime. See the attached graphs for the results. The graphs overlap almost everywhere, but there are small differences. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 300 ++++++++++++++++++++----------------- 1 file changed, 166 insertions(+), 134 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index dbf06a0ef3d..5a12432ccdf 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -242,6 +242,78 @@ ENTRY(native_usergs_sysret64) CFI_REL_OFFSET rsp,RSP /*CFI_REL_OFFSET ss,SS*/ .endm + +/* + * initial frame state for interrupts and exceptions + */ + .macro _frame ref + CFI_STARTPROC simple + CFI_SIGNAL_FRAME + CFI_DEF_CFA rsp,SS+8-\ref + /*CFI_REL_OFFSET ss,SS-\ref*/ + CFI_REL_OFFSET rsp,RSP-\ref + /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ + /*CFI_REL_OFFSET cs,CS-\ref*/ + CFI_REL_OFFSET rip,RIP-\ref + .endm + +/* + * initial frame state for interrupts (and exceptions without error code) + */ +#define INTR_FRAME _frame RIP +/* + * initial frame state for exceptions with error code (and interrupts + * with vector already pushed) + */ +#define XCPT_FRAME _frame ORIG_RAX + +/* save partial stack frame */ +ENTRY(save_args) + XCPT_FRAME + cld + movq %rdi, 8*8+16(%rsp) + CFI_REL_OFFSET rdi, 8*8+16 + movq %rsi, 7*8+16(%rsp) + CFI_REL_OFFSET rsi, 7*8+16 + movq %rdx, 6*8+16(%rsp) + CFI_REL_OFFSET rdx, 6*8+16 + movq %rcx, 5*8+16(%rsp) + CFI_REL_OFFSET rcx, 5*8+16 + movq %rax, 4*8+16(%rsp) + CFI_REL_OFFSET rax, 4*8+16 + movq %r8, 3*8+16(%rsp) + CFI_REL_OFFSET r8, 3*8+16 + movq %r9, 2*8+16(%rsp) + CFI_REL_OFFSET r9, 2*8+16 + movq %r10, 1*8+16(%rsp) + CFI_REL_OFFSET r10, 1*8+16 + movq %r11, 0*8+16(%rsp) + CFI_REL_OFFSET r11, 0*8+16 + leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ + movq %rbp, 8(%rsp) /* push %rbp */ + leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ + testl $3, CS(%rdi) + je 1f + SWAPGS + /* + * irqcount is used to check if a CPU is already on an interrupt stack + * or not. While this is essentially redundant with preempt_count it is + * a little cheaper to use a separate counter in the PDA (short of + * moving irq_enter into assembly, which would be too much work) + */ +1: incl %gs:pda_irqcount + jne 2f + pop %rax /* move return address... */ + mov %gs:pda_irqstackptr,%rsp + push %rax /* ... to the new stack */ + /* + * We entered an interrupt context - irqs are off: + */ +2: TRACE_IRQS_OFF + ret + CFI_ENDPROC +END(save_args) + /* * A newly forked process directly context switches into this. */ @@ -607,26 +679,6 @@ ENTRY(stub_rt_sigreturn) CFI_ENDPROC END(stub_rt_sigreturn) -/* - * initial frame state for interrupts and exceptions - */ - .macro _frame ref - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-\ref - /*CFI_REL_OFFSET ss,SS-\ref*/ - CFI_REL_OFFSET rsp,RSP-\ref - /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ - /*CFI_REL_OFFSET cs,CS-\ref*/ - CFI_REL_OFFSET rip,RIP-\ref - .endm - -/* initial frame state for interrupts (and exceptions without error code) */ -#define INTR_FRAME _frame RIP -/* initial frame state for exceptions with error code (and interrupts with - vector already pushed) */ -#define XCPT_FRAME _frame ORIG_RAX - /* * Build the entry stubs and pointer table with some assembler magic. * We pack 7 stubs into a single 32-byte chunk, which will fit in a @@ -667,46 +719,19 @@ END(irq_entries_start) END(interrupt) .previous -/* +/* * Interrupt entry/exit. * * Interrupt entry points save only callee clobbered registers in fast path. - * - * Entry runs with interrupts off. - */ + * + * Entry runs with interrupts off. + */ /* 0(%rsp): ~(interrupt number) */ .macro interrupt func - cld - SAVE_ARGS - leaq -ARGOFFSET(%rsp),%rdi /* arg1 for handler */ - pushq %rbp - /* - * Save rbp twice: One is for marking the stack frame, as usual, and the - * other, to fill pt_regs properly. This is because bx comes right - * before the last saved register in that structure, and not bp. If the - * base pointer were in the place bx is today, this would not be needed. - */ - movq %rbp, -8(%rsp) - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rbp, 0 - movq %rsp,%rbp - CFI_DEF_CFA_REGISTER rbp - testl $3,CS(%rdi) - je 1f - SWAPGS - /* irqcount is used to check if a CPU is already on an interrupt - stack or not. While this is essentially redundant with preempt_count - it is a little cheaper to use a separate counter in the PDA - (short of moving irq_enter into assembly, which would be too - much work) */ -1: incl %gs:pda_irqcount - cmoveq %gs:pda_irqstackptr,%rsp - push %rbp # backlink for old unwinder - /* - * We entered an interrupt context - irqs are off: - */ - TRACE_IRQS_OFF + subq $10*8, %rsp + CFI_ADJUST_CFA_OFFSET 10*8 + call save_args call \func .endm @@ -852,6 +877,8 @@ END(common_interrupt) /* * APIC interrupts. */ + .p2align 5 + .macro apicinterrupt num,func INTR_FRAME pushq $~(\num) @@ -922,24 +949,29 @@ END(spurious_interrupt) .macro zeroentry sym INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $0 /* push error code/oldrax */ + pushq $-1 /* ORIG_RAX: no syscall to restart */ CFI_ADJUST_CFA_OFFSET 8 - pushq %rax /* push real oldrax to the rdi slot */ - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rax,0 - leaq \sym(%rip),%rax - jmp error_entry + subq $15*8,%rsp + CFI_ADJUST_CFA_OFFSET 15*8 + call error_entry + movq %rsp,%rdi /* pt_regs pointer */ + xorl %esi,%esi /* no error code */ + call \sym + jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC .endm .macro errorentry sym XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq %rax - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rax,0 - leaq \sym(%rip),%rax - jmp error_entry + subq $15*8,%rsp + CFI_ADJUST_CFA_OFFSET 15*8 + call error_entry + movq %rsp,%rdi /* pt_regs pointer */ + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ + call \sym + jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC .endm @@ -1043,93 +1075,93 @@ paranoid_schedule\trace: .endm /* - * Exception entry point. This expects an error code/orig_rax on the stack - * and the exception handler in %rax. + * Exception entry point. This expects an error code/orig_rax on the stack. + * returns in "no swapgs flag" in %ebx. */ KPROBE_ENTRY(error_entry) _frame RDI - CFI_REL_OFFSET rax,0 - /* rdi slot contains rax, oldrax contains error code */ + CFI_ADJUST_CFA_OFFSET 15*8 + /* oldrax contains error code */ cld - subq $14*8,%rsp - CFI_ADJUST_CFA_OFFSET (14*8) - movq %rsi,13*8(%rsp) - CFI_REL_OFFSET rsi,RSI - movq 14*8(%rsp),%rsi /* load rax from rdi slot */ - CFI_REGISTER rax,rsi - movq %rdx,12*8(%rsp) - CFI_REL_OFFSET rdx,RDX - movq %rcx,11*8(%rsp) - CFI_REL_OFFSET rcx,RCX - movq %rsi,10*8(%rsp) /* store rax */ - CFI_REL_OFFSET rax,RAX - movq %r8, 9*8(%rsp) - CFI_REL_OFFSET r8,R8 - movq %r9, 8*8(%rsp) - CFI_REL_OFFSET r9,R9 - movq %r10,7*8(%rsp) - CFI_REL_OFFSET r10,R10 - movq %r11,6*8(%rsp) - CFI_REL_OFFSET r11,R11 - movq %rbx,5*8(%rsp) - CFI_REL_OFFSET rbx,RBX - movq %rbp,4*8(%rsp) - CFI_REL_OFFSET rbp,RBP - movq %r12,3*8(%rsp) - CFI_REL_OFFSET r12,R12 - movq %r13,2*8(%rsp) - CFI_REL_OFFSET r13,R13 - movq %r14,1*8(%rsp) - CFI_REL_OFFSET r14,R14 - movq %r15,(%rsp) - CFI_REL_OFFSET r15,R15 + movq %rdi,14*8+8(%rsp) + CFI_REL_OFFSET rdi,RDI+8 + movq %rsi,13*8+8(%rsp) + CFI_REL_OFFSET rsi,RSI+8 + movq %rdx,12*8+8(%rsp) + CFI_REL_OFFSET rdx,RDX+8 + movq %rcx,11*8+8(%rsp) + CFI_REL_OFFSET rcx,RCX+8 + movq %rax,10*8+8(%rsp) + CFI_REL_OFFSET rax,RAX+8 + movq %r8, 9*8+8(%rsp) + CFI_REL_OFFSET r8,R8+8 + movq %r9, 8*8+8(%rsp) + CFI_REL_OFFSET r9,R9+8 + movq %r10,7*8+8(%rsp) + CFI_REL_OFFSET r10,R10+8 + movq %r11,6*8+8(%rsp) + CFI_REL_OFFSET r11,R11+8 + movq %rbx,5*8+8(%rsp) + CFI_REL_OFFSET rbx,RBX+8 + movq %rbp,4*8+8(%rsp) + CFI_REL_OFFSET rbp,RBP+8 + movq %r12,3*8+8(%rsp) + CFI_REL_OFFSET r12,R12+8 + movq %r13,2*8+8(%rsp) + CFI_REL_OFFSET r13,R13+8 + movq %r14,1*8+8(%rsp) + CFI_REL_OFFSET r14,R14+8 + movq %r15,0*8+8(%rsp) + CFI_REL_OFFSET r15,R15+8 xorl %ebx,%ebx - testl $3,CS(%rsp) - je error_kernelspace + testl $3,CS+8(%rsp) + je error_kernelspace error_swapgs: SWAPGS error_sti: TRACE_IRQS_OFF - movq %rdi,RDI(%rsp) - CFI_REL_OFFSET rdi,RDI - movq %rsp,%rdi - movq ORIG_RAX(%rsp),%rsi /* get error code */ - movq $-1,ORIG_RAX(%rsp) - call *%rax - /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -error_exit: + ret + CFI_ENDPROC + +/* + * There are two places in the kernel that can potentially fault with + * usergs. Handle them here. The exception handlers after iret run with + * kernel gs again, so don't set the user space flag. B stepping K8s + * sometimes report an truncated RIP for IRET exceptions returning to + * compat mode. Check for these here too. + */ +error_kernelspace: + incl %ebx + leaq irq_return(%rip),%rcx + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + movl %ecx,%ecx /* zero extend */ + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + cmpq $gs_change,RIP+8(%rsp) + je error_swapgs + jmp error_sti +KPROBE_END(error_entry) + + +/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +KPROBE_ENTRY(error_exit) + _frame R15 movl %ebx,%eax RESTORE_REST DISABLE_INTERRUPTS(CLBR_NONE) TRACE_IRQS_OFF GET_THREAD_INFO(%rcx) testl %eax,%eax - jne retint_kernel + jne retint_kernel LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - movl $_TIF_WORK_MASK,%edi - andl %edi,%edx - jnz retint_careful + movl TI_flags(%rcx),%edx + movl $_TIF_WORK_MASK,%edi + andl %edi,%edx + jnz retint_careful jmp retint_swapgs CFI_ENDPROC - -error_kernelspace: - incl %ebx - /* There are two places in the kernel that can potentially fault with - usergs. Handle them here. The exception handlers after - iret run with kernel gs again, so don't set the user space flag. - B stepping K8s sometimes report an truncated RIP for IRET - exceptions returning to compat mode. Check for these here too. */ - leaq irq_return(%rip),%rcx - cmpq %rcx,RIP(%rsp) - je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP(%rsp) - je error_swapgs - cmpq $gs_change,RIP(%rsp) - je error_swapgs - jmp error_sti -KPROBE_END(error_entry) +KPROBE_END(error_exit) /* Reload gs selector with exception handling */ /* edi: new selector */ From dcd072e26055de600cecdc3f7a1e083ecd55c2e4 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Thu, 20 Nov 2008 14:40:11 +0100 Subject: [PATCH 07/26] x86: clean up after: move entry_64.S register saving out of the macros This add-on patch to x86: move entry_64.S register saving out of the macros visually cleans up the appearance of the code by introducing some basic helper macro's. It also adds some cfi annotations which were missing. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 222 +++++++++++++++++++------------------ 1 file changed, 113 insertions(+), 109 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 5a12432ccdf..7a04f696121 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -60,6 +60,23 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 +/* + * Some macro's to hide the most frequently occuring CFI annotations. + */ + .macro CFI_PUSHQ reg + pushq \reg + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro CFI_POPQ reg + popq \reg + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro CFI_MOVQ reg offset=0 + movq %\reg, \offset(%rsp) + CFI_REL_OFFSET \reg, \offset + .endm #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE @@ -213,84 +230,84 @@ ENTRY(native_usergs_sysret64) CFI_ADJUST_CFA_OFFSET -(6*8) .endm - .macro CFI_DEFAULT_STACK start=1 - .if \start - CFI_STARTPROC simple - CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8 - .else - CFI_DEF_CFA_OFFSET SS+8 - .endif - CFI_REL_OFFSET r15,R15 - CFI_REL_OFFSET r14,R14 - CFI_REL_OFFSET r13,R13 - CFI_REL_OFFSET r12,R12 - CFI_REL_OFFSET rbp,RBP - CFI_REL_OFFSET rbx,RBX - CFI_REL_OFFSET r11,R11 - CFI_REL_OFFSET r10,R10 - CFI_REL_OFFSET r9,R9 - CFI_REL_OFFSET r8,R8 - CFI_REL_OFFSET rax,RAX - CFI_REL_OFFSET rcx,RCX - CFI_REL_OFFSET rdx,RDX - CFI_REL_OFFSET rsi,RSI - CFI_REL_OFFSET rdi,RDI - CFI_REL_OFFSET rip,RIP - /*CFI_REL_OFFSET cs,CS*/ - /*CFI_REL_OFFSET rflags,EFLAGS*/ - CFI_REL_OFFSET rsp,RSP - /*CFI_REL_OFFSET ss,SS*/ - .endm - /* - * initial frame state for interrupts and exceptions + * initial frame state for interrupts (and exceptions without error code) */ - .macro _frame ref + .macro EMPTY_FRAME start=1 offset=0 + .if \start CFI_STARTPROC simple CFI_SIGNAL_FRAME - CFI_DEF_CFA rsp,SS+8-\ref - /*CFI_REL_OFFSET ss,SS-\ref*/ - CFI_REL_OFFSET rsp,RSP-\ref - /*CFI_REL_OFFSET rflags,EFLAGS-\ref*/ - /*CFI_REL_OFFSET cs,CS-\ref*/ - CFI_REL_OFFSET rip,RIP-\ref + CFI_DEF_CFA rsp,8+\offset + .else + CFI_DEF_CFA_OFFSET 8+\offset + .endif .endm /* * initial frame state for interrupts (and exceptions without error code) */ -#define INTR_FRAME _frame RIP + .macro INTR_FRAME start=1 offset=0 + EMPTY_FRAME \start, (SS+8-RIP)+\offset + /*CFI_REL_OFFSET ss, SS-RIP+\offset*/ + CFI_REL_OFFSET rsp, RSP-RIP+\offset + /*CFI_REL_OFFSET rflags, EFLAGS-RIP+\offset*/ + /*CFI_REL_OFFSET cs, CS-RIP+\offset*/ + CFI_REL_OFFSET rip, RIP-RIP+\offset + .endm + /* * initial frame state for exceptions with error code (and interrupts * with vector already pushed) */ -#define XCPT_FRAME _frame ORIG_RAX + .macro XCPT_FRAME start=1 offset=0 + INTR_FRAME \start, (RIP-ORIG_RAX)+\offset + /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ + .endm + +/* + * frame that enables calling into C. + */ + .macro PARTIAL_FRAME start=1 offset=0 + XCPT_FRAME \start, (ORIG_RAX-ARGOFFSET)+\offset + CFI_REL_OFFSET rdi, (RDI-ARGOFFSET)+\offset + CFI_REL_OFFSET rsi, (RSI-ARGOFFSET)+\offset + CFI_REL_OFFSET rdx, (RDX-ARGOFFSET)+\offset + CFI_REL_OFFSET rcx, (RCX-ARGOFFSET)+\offset + CFI_REL_OFFSET rax, (RAX-ARGOFFSET)+\offset + CFI_REL_OFFSET r8, (R8-ARGOFFSET)+\offset + CFI_REL_OFFSET r9, (R9-ARGOFFSET)+\offset + CFI_REL_OFFSET r10, (R10-ARGOFFSET)+\offset + CFI_REL_OFFSET r11, (R11-ARGOFFSET)+\offset + .endm + +/* + * frame that enables passing a complete pt_regs to a C function. + */ + .macro DEFAULT_FRAME start=1 offset=0 + PARTIAL_FRAME \start, (R11-R15)+\offset + CFI_REL_OFFSET rbx, RBX+\offset + CFI_REL_OFFSET rbp, RBP+\offset + CFI_REL_OFFSET r12, R12+\offset + CFI_REL_OFFSET r13, R13+\offset + CFI_REL_OFFSET r14, R14+\offset + CFI_REL_OFFSET r15, R15+\offset + .endm /* save partial stack frame */ ENTRY(save_args) XCPT_FRAME cld - movq %rdi, 8*8+16(%rsp) - CFI_REL_OFFSET rdi, 8*8+16 - movq %rsi, 7*8+16(%rsp) - CFI_REL_OFFSET rsi, 7*8+16 - movq %rdx, 6*8+16(%rsp) - CFI_REL_OFFSET rdx, 6*8+16 - movq %rcx, 5*8+16(%rsp) - CFI_REL_OFFSET rcx, 5*8+16 - movq %rax, 4*8+16(%rsp) - CFI_REL_OFFSET rax, 4*8+16 - movq %r8, 3*8+16(%rsp) - CFI_REL_OFFSET r8, 3*8+16 - movq %r9, 2*8+16(%rsp) - CFI_REL_OFFSET r9, 2*8+16 - movq %r10, 1*8+16(%rsp) - CFI_REL_OFFSET r10, 1*8+16 - movq %r11, 0*8+16(%rsp) - CFI_REL_OFFSET r11, 0*8+16 + CFI_MOVQ rdi, (RDI-ARGOFFSET)+16 + CFI_MOVQ rsi, (RSI-ARGOFFSET)+16 + CFI_MOVQ rdx, (RDX-ARGOFFSET)+16 + CFI_MOVQ rcx, (RCX-ARGOFFSET)+16 + CFI_MOVQ rax, (RAX-ARGOFFSET)+16 + CFI_MOVQ r8, (R8-ARGOFFSET)+16 + CFI_MOVQ r9, (R9-ARGOFFSET)+16 + CFI_MOVQ r10, (R10-ARGOFFSET)+16 + CFI_MOVQ r11, (R11-ARGOFFSET)+16 leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ - movq %rbp, 8(%rsp) /* push %rbp */ + CFI_MOVQ rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) je 1f @@ -303,9 +320,10 @@ ENTRY(save_args) */ 1: incl %gs:pda_irqcount jne 2f - pop %rax /* move return address... */ + CFI_POPQ %rax /* move return address... */ mov %gs:pda_irqstackptr,%rsp - push %rax /* ... to the new stack */ + EMPTY_FRAME 0 + CFI_PUSHQ %rax /* ... to the new stack */ /* * We entered an interrupt context - irqs are off: */ @@ -319,7 +337,7 @@ END(save_args) */ /* rdi: prev */ ENTRY(ret_from_fork) - CFI_DEFAULT_STACK + DEFAULT_FRAME push kernel_eflags(%rip) CFI_ADJUST_CFA_OFFSET 8 popf # reset kernel eflags @@ -732,6 +750,7 @@ END(interrupt) subq $10*8, %rsp CFI_ADJUST_CFA_OFFSET 10*8 call save_args + PARTIAL_FRAME 0 call \func .endm @@ -949,11 +968,11 @@ END(spurious_interrupt) .macro zeroentry sym INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $-1 /* ORIG_RAX: no syscall to restart */ - CFI_ADJUST_CFA_OFFSET 8 + CFI_PUSHQ $-1 /* ORIG_RAX: no syscall to restart */ subq $15*8,%rsp CFI_ADJUST_CFA_OFFSET 15*8 call error_entry + DEFAULT_FRAME 0 movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ call \sym @@ -967,6 +986,7 @@ END(spurious_interrupt) subq $15*8,%rsp CFI_ADJUST_CFA_OFFSET 15*8 call error_entry + DEFAULT_FRAME 0 movq %rsp,%rdi /* pt_regs pointer */ movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ @@ -1079,40 +1099,25 @@ paranoid_schedule\trace: * returns in "no swapgs flag" in %ebx. */ KPROBE_ENTRY(error_entry) - _frame RDI + XCPT_FRAME CFI_ADJUST_CFA_OFFSET 15*8 /* oldrax contains error code */ cld - movq %rdi,14*8+8(%rsp) - CFI_REL_OFFSET rdi,RDI+8 - movq %rsi,13*8+8(%rsp) - CFI_REL_OFFSET rsi,RSI+8 - movq %rdx,12*8+8(%rsp) - CFI_REL_OFFSET rdx,RDX+8 - movq %rcx,11*8+8(%rsp) - CFI_REL_OFFSET rcx,RCX+8 - movq %rax,10*8+8(%rsp) - CFI_REL_OFFSET rax,RAX+8 - movq %r8, 9*8+8(%rsp) - CFI_REL_OFFSET r8,R8+8 - movq %r9, 8*8+8(%rsp) - CFI_REL_OFFSET r9,R9+8 - movq %r10,7*8+8(%rsp) - CFI_REL_OFFSET r10,R10+8 - movq %r11,6*8+8(%rsp) - CFI_REL_OFFSET r11,R11+8 - movq %rbx,5*8+8(%rsp) - CFI_REL_OFFSET rbx,RBX+8 - movq %rbp,4*8+8(%rsp) - CFI_REL_OFFSET rbp,RBP+8 - movq %r12,3*8+8(%rsp) - CFI_REL_OFFSET r12,R12+8 - movq %r13,2*8+8(%rsp) - CFI_REL_OFFSET r13,R13+8 - movq %r14,1*8+8(%rsp) - CFI_REL_OFFSET r14,R14+8 - movq %r15,0*8+8(%rsp) - CFI_REL_OFFSET r15,R15+8 + CFI_MOVQ rdi, RDI+8 + CFI_MOVQ rsi, RSI+8 + CFI_MOVQ rdx, RDX+8 + CFI_MOVQ rcx, RCX+8 + CFI_MOVQ rax, RAX+8 + CFI_MOVQ r8, R8+8 + CFI_MOVQ r9, R9+8 + CFI_MOVQ r10, R10+8 + CFI_MOVQ r11, R11+8 + CFI_MOVQ rbx, RBX+8 + CFI_MOVQ rbp, RBP+8 + CFI_MOVQ r12, R12+8 + CFI_MOVQ r13, R13+8 + CFI_MOVQ r14, R14+8 + CFI_MOVQ r15, R15+8 xorl %ebx,%ebx testl $3,CS+8(%rsp) je error_kernelspace @@ -1146,7 +1151,7 @@ KPROBE_END(error_entry) /* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ KPROBE_ENTRY(error_exit) - _frame R15 + DEFAULT_FRAME movl %ebx,%eax RESTORE_REST DISABLE_INTERRUPTS(CLBR_NONE) @@ -1455,7 +1460,7 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) see the correct pointer to the pt_regs */ movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC - CFI_DEFAULT_STACK + DEFAULT_FRAME 11: incl %gs:pda_irqcount movq %rsp,%rbp CFI_DEF_CFA_REGISTER rbp @@ -1483,10 +1488,13 @@ END(do_hypervisor_callback) # with its current contents: any discrepancy means we in category 1. */ ENTRY(xen_failsafe_callback) - framesz = (RIP-0x30) /* workaround buggy gas */ - _frame framesz - CFI_REL_OFFSET rcx, 0 - CFI_REL_OFFSET r11, 8 + INTR_FRAME 1 (6*8) + /*CFI_REL_OFFSET gs,GS*/ + /*CFI_REL_OFFSET fs,FS*/ + /*CFI_REL_OFFSET es,ES*/ + /*CFI_REL_OFFSET ds,DS*/ + CFI_REL_OFFSET r11,8 + CFI_REL_OFFSET rcx,0 movw %ds,%cx cmpw %cx,0x10(%rsp) CFI_REMEMBER_STATE @@ -1507,12 +1515,9 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq $0 - CFI_ADJUST_CFA_OFFSET 8 - pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 - pushq %rcx - CFI_ADJUST_CFA_OFFSET 8 + CFI_PUSHQ $0 /* RIP */ + CFI_PUSHQ %r11 + CFI_PUSHQ %rcx jmp general_protection CFI_RESTORE_STATE 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ @@ -1522,8 +1527,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - pushq $0 - CFI_ADJUST_CFA_OFFSET 8 + CFI_PUSHQ $0 SAVE_ALL jmp error_exit CFI_ENDPROC From e8a0e27662186f8856a0a6242e7a8386c9a64a53 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Nov 2008 15:11:32 +0100 Subject: [PATCH 08/26] x86: clean up after: move entry_64.S register saving out of the macros, fix Impact: build fix The break builds with older binutils (2.16.1): arch/x86/kernel/entry_64.S: Assembler messages: arch/x86/kernel/entry_64.S:282: Error: too many positional arguments arch/x86/kernel/entry_64.S:283: Error: too many positional arguments arch/x86/kernel/entry_64.S:284: Error: too many positional arguments arch/x86/kernel/entry_64.S:285: Error: too many positional arguments arch/x86/kernel/entry_64.S:286: Error: too many positional arguments arch/x86/kernel/entry_64.S:287: Error: too many positional arguments arch/x86/kernel/entry_64.S:288: Error: too many positional arguments arch/x86/kernel/entry_64.S:289: Error: too many positional arguments arch/x86/kernel/entry_64.S:290: Error: too many positional arguments Took some time to figure out the detail that GAS chokes on: it's negative offsets. Rearrange the calculations to make sure we never go negative. Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 54 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 7a04f696121..4e3d83678f8 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -247,12 +247,12 @@ ENTRY(native_usergs_sysret64) * initial frame state for interrupts (and exceptions without error code) */ .macro INTR_FRAME start=1 offset=0 - EMPTY_FRAME \start, (SS+8-RIP)+\offset - /*CFI_REL_OFFSET ss, SS-RIP+\offset*/ - CFI_REL_OFFSET rsp, RSP-RIP+\offset - /*CFI_REL_OFFSET rflags, EFLAGS-RIP+\offset*/ - /*CFI_REL_OFFSET cs, CS-RIP+\offset*/ - CFI_REL_OFFSET rip, RIP-RIP+\offset + EMPTY_FRAME \start, SS+8+\offset-RIP + /*CFI_REL_OFFSET ss, SS+\offset-RIP*/ + CFI_REL_OFFSET rsp, RSP+\offset-RIP + /*CFI_REL_OFFSET rflags, EFLAGS+\offset-RIP*/ + /*CFI_REL_OFFSET cs, CS+\offset-RIP*/ + CFI_REL_OFFSET rip, RIP+\offset-RIP .endm /* @@ -260,7 +260,7 @@ ENTRY(native_usergs_sysret64) * with vector already pushed) */ .macro XCPT_FRAME start=1 offset=0 - INTR_FRAME \start, (RIP-ORIG_RAX)+\offset + INTR_FRAME \start, RIP+\offset-ORIG_RAX /*CFI_REL_OFFSET orig_rax, ORIG_RAX-ORIG_RAX*/ .endm @@ -268,23 +268,23 @@ ENTRY(native_usergs_sysret64) * frame that enables calling into C. */ .macro PARTIAL_FRAME start=1 offset=0 - XCPT_FRAME \start, (ORIG_RAX-ARGOFFSET)+\offset - CFI_REL_OFFSET rdi, (RDI-ARGOFFSET)+\offset - CFI_REL_OFFSET rsi, (RSI-ARGOFFSET)+\offset - CFI_REL_OFFSET rdx, (RDX-ARGOFFSET)+\offset - CFI_REL_OFFSET rcx, (RCX-ARGOFFSET)+\offset - CFI_REL_OFFSET rax, (RAX-ARGOFFSET)+\offset - CFI_REL_OFFSET r8, (R8-ARGOFFSET)+\offset - CFI_REL_OFFSET r9, (R9-ARGOFFSET)+\offset - CFI_REL_OFFSET r10, (R10-ARGOFFSET)+\offset - CFI_REL_OFFSET r11, (R11-ARGOFFSET)+\offset + XCPT_FRAME \start, ORIG_RAX+\offset-ARGOFFSET + CFI_REL_OFFSET rdi, RDI+\offset-ARGOFFSET + CFI_REL_OFFSET rsi, RSI+\offset-ARGOFFSET + CFI_REL_OFFSET rdx, RDX+\offset-ARGOFFSET + CFI_REL_OFFSET rcx, RCX+\offset-ARGOFFSET + CFI_REL_OFFSET rax, RAX+\offset-ARGOFFSET + CFI_REL_OFFSET r8, R8+\offset-ARGOFFSET + CFI_REL_OFFSET r9, R9+\offset-ARGOFFSET + CFI_REL_OFFSET r10, R10+\offset-ARGOFFSET + CFI_REL_OFFSET r11, R11+\offset-ARGOFFSET .endm /* * frame that enables passing a complete pt_regs to a C function. */ .macro DEFAULT_FRAME start=1 offset=0 - PARTIAL_FRAME \start, (R11-R15)+\offset + PARTIAL_FRAME \start, R11+\offset-R15 CFI_REL_OFFSET rbx, RBX+\offset CFI_REL_OFFSET rbp, RBP+\offset CFI_REL_OFFSET r12, R12+\offset @@ -297,15 +297,15 @@ ENTRY(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - CFI_MOVQ rdi, (RDI-ARGOFFSET)+16 - CFI_MOVQ rsi, (RSI-ARGOFFSET)+16 - CFI_MOVQ rdx, (RDX-ARGOFFSET)+16 - CFI_MOVQ rcx, (RCX-ARGOFFSET)+16 - CFI_MOVQ rax, (RAX-ARGOFFSET)+16 - CFI_MOVQ r8, (R8-ARGOFFSET)+16 - CFI_MOVQ r9, (R9-ARGOFFSET)+16 - CFI_MOVQ r10, (R10-ARGOFFSET)+16 - CFI_MOVQ r11, (R11-ARGOFFSET)+16 + CFI_MOVQ rdi, RDI+16-ARGOFFSET + CFI_MOVQ rsi, RSI+16-ARGOFFSET + CFI_MOVQ rdx, RDX+16-ARGOFFSET + CFI_MOVQ rcx, RCX+16-ARGOFFSET + CFI_MOVQ rax, RAX+16-ARGOFFSET + CFI_MOVQ r8, R8+16-ARGOFFSET + CFI_MOVQ r9, R9+16-ARGOFFSET + CFI_MOVQ r10, R10+16-ARGOFFSET + CFI_MOVQ r11, R11+16-ARGOFFSET leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ CFI_MOVQ rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ From 14ae22ba2b8bb3d53fb795f9b8074aa39ef7b6cd Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 21 Nov 2008 15:20:47 +0100 Subject: [PATCH 09/26] x86: entry_64.S: rename Impact: cleanup Rename: CFI_PUSHQ => pushq_cfi CFI_POPQ => popq_cfi CFI_MOVQ => movq_cfi To make it blend better into regular assembly code. Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 71 +++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 4e3d83678f8..92c5e18340d 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -63,17 +63,17 @@ /* * Some macro's to hide the most frequently occuring CFI annotations. */ - .macro CFI_PUSHQ reg + .macro pushq_cfi reg pushq \reg CFI_ADJUST_CFA_OFFSET 8 .endm - .macro CFI_POPQ reg + .macro popq_cfi reg popq \reg CFI_ADJUST_CFA_OFFSET -8 .endm - .macro CFI_MOVQ reg offset=0 + .macro movq_cfi reg offset=0 movq %\reg, \offset(%rsp) CFI_REL_OFFSET \reg, \offset .endm @@ -297,17 +297,18 @@ ENTRY(native_usergs_sysret64) ENTRY(save_args) XCPT_FRAME cld - CFI_MOVQ rdi, RDI+16-ARGOFFSET - CFI_MOVQ rsi, RSI+16-ARGOFFSET - CFI_MOVQ rdx, RDX+16-ARGOFFSET - CFI_MOVQ rcx, RCX+16-ARGOFFSET - CFI_MOVQ rax, RAX+16-ARGOFFSET - CFI_MOVQ r8, R8+16-ARGOFFSET - CFI_MOVQ r9, R9+16-ARGOFFSET - CFI_MOVQ r10, R10+16-ARGOFFSET - CFI_MOVQ r11, R11+16-ARGOFFSET + movq_cfi rdi, RDI+16-ARGOFFSET + movq_cfi rsi, RSI+16-ARGOFFSET + movq_cfi rdx, RDX+16-ARGOFFSET + movq_cfi rcx, RCX+16-ARGOFFSET + movq_cfi rax, RAX+16-ARGOFFSET + movq_cfi r8, R8+16-ARGOFFSET + movq_cfi r9, R9+16-ARGOFFSET + movq_cfi r10, R10+16-ARGOFFSET + movq_cfi r11, R11+16-ARGOFFSET + leaq -ARGOFFSET+16(%rsp),%rdi /* arg1 for handler */ - CFI_MOVQ rbp, 8 /* push %rbp */ + movq_cfi rbp, 8 /* push %rbp */ leaq 8(%rsp), %rbp /* mov %rsp, %ebp */ testl $3, CS(%rdi) je 1f @@ -320,10 +321,10 @@ ENTRY(save_args) */ 1: incl %gs:pda_irqcount jne 2f - CFI_POPQ %rax /* move return address... */ + popq_cfi %rax /* move return address... */ mov %gs:pda_irqstackptr,%rsp EMPTY_FRAME 0 - CFI_PUSHQ %rax /* ... to the new stack */ + pushq_cfi %rax /* ... to the new stack */ /* * We entered an interrupt context - irqs are off: */ @@ -968,7 +969,7 @@ END(spurious_interrupt) .macro zeroentry sym INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - CFI_PUSHQ $-1 /* ORIG_RAX: no syscall to restart */ + pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ subq $15*8,%rsp CFI_ADJUST_CFA_OFFSET 15*8 call error_entry @@ -1103,21 +1104,21 @@ KPROBE_ENTRY(error_entry) CFI_ADJUST_CFA_OFFSET 15*8 /* oldrax contains error code */ cld - CFI_MOVQ rdi, RDI+8 - CFI_MOVQ rsi, RSI+8 - CFI_MOVQ rdx, RDX+8 - CFI_MOVQ rcx, RCX+8 - CFI_MOVQ rax, RAX+8 - CFI_MOVQ r8, R8+8 - CFI_MOVQ r9, R9+8 - CFI_MOVQ r10, R10+8 - CFI_MOVQ r11, R11+8 - CFI_MOVQ rbx, RBX+8 - CFI_MOVQ rbp, RBP+8 - CFI_MOVQ r12, R12+8 - CFI_MOVQ r13, R13+8 - CFI_MOVQ r14, R14+8 - CFI_MOVQ r15, R15+8 + movq_cfi rdi, RDI+8 + movq_cfi rsi, RSI+8 + movq_cfi rdx, RDX+8 + movq_cfi rcx, RCX+8 + movq_cfi rax, RAX+8 + movq_cfi r8, R8+8 + movq_cfi r9, R9+8 + movq_cfi r10, R10+8 + movq_cfi r11, R11+8 + movq_cfi rbx, RBX+8 + movq_cfi rbp, RBP+8 + movq_cfi r12, R12+8 + movq_cfi r13, R13+8 + movq_cfi r14, R14+8 + movq_cfi r15, R15+8 xorl %ebx,%ebx testl $3,CS+8(%rsp) je error_kernelspace @@ -1515,9 +1516,9 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - CFI_PUSHQ $0 /* RIP */ - CFI_PUSHQ %r11 - CFI_PUSHQ %rcx + pushq_cfi $0 /* RIP */ + pushq_cfi %r11 + pushq_cfi %rcx jmp general_protection CFI_RESTORE_STATE 1: /* Segment mismatch => Category 1 (Bad segment). Retry the IRET. */ @@ -1527,7 +1528,7 @@ ENTRY(xen_failsafe_callback) CFI_RESTORE r11 addq $0x30,%rsp CFI_ADJUST_CFA_OFFSET -0x30 - CFI_PUSHQ $0 + pushq_cfi $0 SAVE_ALL jmp error_exit CFI_ENDPROC From c002a1e6b6b6f07ae04e68987054bf1f2150ae48 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 21 Nov 2008 16:41:55 +0100 Subject: [PATCH 10/26] x86: introduce save_rest and restructure the PTREGSCALL macro in entry_64.S Impact: cleanup The save_rest function completes a partial stack frame for use by the PTREGSCALL macro. This also avoids the indirect call in PTREGSCALLs. This adds the macro movq_cfi_restore to hide the CFI_RESTORE annotation when restoring a register from the stack frame. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 91 ++++++++++++++++++++++---------------- 1 file changed, 53 insertions(+), 38 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 92c5e18340d..ef95c45b926 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -78,6 +78,11 @@ CFI_REL_OFFSET \reg, \offset .endm + .macro movq_cfi_restore offset reg + movq \offset(%rsp), %\reg + CFI_RESTORE \reg + .endm + #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) @@ -186,21 +191,21 @@ ENTRY(native_usergs_sysret64) */ /* %rsp:at FRAMEEND */ - .macro FIXUP_TOP_OF_STACK tmp - movq %gs:pda_oldrsp,\tmp - movq \tmp,RSP(%rsp) - movq $__USER_DS,SS(%rsp) - movq $__USER_CS,CS(%rsp) - movq $-1,RCX(%rsp) - movq R11(%rsp),\tmp /* get eflags */ - movq \tmp,EFLAGS(%rsp) + .macro FIXUP_TOP_OF_STACK tmp offset=0 + movq %gs:pda_oldrsp,\tmp + movq \tmp,RSP+\offset(%rsp) + movq $__USER_DS,SS+\offset(%rsp) + movq $__USER_CS,CS+\offset(%rsp) + movq $-1,RCX+\offset(%rsp) + movq R11+\offset(%rsp),\tmp /* get eflags */ + movq \tmp,EFLAGS+\offset(%rsp) .endm - .macro RESTORE_TOP_OF_STACK tmp,offset=0 - movq RSP-\offset(%rsp),\tmp - movq \tmp,%gs:pda_oldrsp - movq EFLAGS-\offset(%rsp),\tmp - movq \tmp,R11-\offset(%rsp) + .macro RESTORE_TOP_OF_STACK tmp offset=0 + movq RSP+\offset(%rsp),\tmp + movq \tmp,%gs:pda_oldrsp + movq EFLAGS+\offset(%rsp),\tmp + movq \tmp,R11+\offset(%rsp) .endm .macro FAKE_STACK_FRAME child_rip @@ -333,6 +338,21 @@ ENTRY(save_args) CFI_ENDPROC END(save_args) +ENTRY(save_rest) + PARTIAL_FRAME 1 REST_SKIP+8 + movq 5*8+16(%rsp), %r11 /* save return address */ + movq_cfi rbx, RBX+16 + movq_cfi rbp, RBP+16 + movq_cfi r12, R12+16 + movq_cfi r13, R13+16 + movq_cfi r14, R14+16 + movq_cfi r15, R15+16 + movq %r11, 8(%rsp) /* return address */ + FIXUP_TOP_OF_STACK %r11, 16 + ret + CFI_ENDPROC +END(save_rest) + /* * A newly forked process directly context switches into this. */ @@ -353,7 +373,7 @@ rff_action: je int_ret_from_sys_call testl $_TIF_IA32,TI_flags(%rcx) jnz int_ret_from_sys_call - RESTORE_TOP_OF_STACK %rdi,ARGOFFSET + RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET jmp ret_from_sys_call rff_trace: movq %rsp,%rdi @@ -626,18 +646,20 @@ END(system_call) /* * Certain special system calls that need to save a complete full stack frame. */ - .macro PTREGSCALL label,func,arg - .globl \label -\label: - leaq \func(%rip),%rax - leaq -ARGOFFSET+8(%rsp),\arg /* 8 for return address */ - jmp ptregscall_common +ENTRY(\label) + PARTIAL_FRAME 1 8 /* offset 8: return address */ + subq $REST_SKIP, %rsp + CFI_ADJUST_CFA_OFFSET REST_SKIP + call save_rest + DEFAULT_FRAME 0 8 /* offset 8: return address */ + leaq 8(%rsp), \arg /* pt_regs pointer */ + call \func + jmp ptregscall_common + CFI_ENDPROC END(\label) .endm - CFI_STARTPROC - PTREGSCALL stub_clone, sys_clone, %r8 PTREGSCALL stub_fork, sys_fork, %rdi PTREGSCALL stub_vfork, sys_vfork, %rdi @@ -645,22 +667,15 @@ END(\label) PTREGSCALL stub_iopl, sys_iopl, %rsi ENTRY(ptregscall_common) - popq %r11 - CFI_ADJUST_CFA_OFFSET -8 - CFI_REGISTER rip, r11 - SAVE_REST - movq %r11, %r15 - CFI_REGISTER rip, r15 - FIXUP_TOP_OF_STACK %r11 - call *%rax - RESTORE_TOP_OF_STACK %r11 - movq %r15, %r11 - CFI_REGISTER rip, r11 - RESTORE_REST - pushq %r11 - CFI_ADJUST_CFA_OFFSET 8 - CFI_REL_OFFSET rip, 0 - ret + DEFAULT_FRAME 1 8 /* offset 8: return address */ + RESTORE_TOP_OF_STACK %r11, 8 + movq_cfi_restore R15+8, r15 + movq_cfi_restore R14+8, r14 + movq_cfi_restore R13+8, r13 + movq_cfi_restore R12+8, r12 + movq_cfi_restore RBP+8, rbp + movq_cfi_restore RBX+8, rbx + ret $REST_SKIP /* pop extended registers */ CFI_ENDPROC END(ptregscall_common) From e2f6bc25b98dbb10d809ee50262b43fcae67840a Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 21 Nov 2008 16:43:18 +0100 Subject: [PATCH 11/26] x86: entry_64.S: factor out save_paranoid and paranoid_exit Impact: cleanup, shrink kernel image size Also expand the paranoid_exit0 macro into nmi_exit inside the nmi stub in the case of enabled irq-tracing. This gives a few hundred bytes code size reduction. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 151 +++++++++++++++++++++++++------------ 1 file changed, 102 insertions(+), 49 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index ef95c45b926..fad777b1136 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -353,6 +353,36 @@ ENTRY(save_rest) CFI_ENDPROC END(save_rest) +/* save complete stack frame */ +ENTRY(save_paranoid) + XCPT_FRAME 1 RDI+8 + cld + movq_cfi rdi, RDI+8 + movq_cfi rsi, RSI+8 + movq_cfi rdx, RDX+8 + movq_cfi rcx, RCX+8 + movq_cfi rax, RAX+8 + movq_cfi r8, R8+8 + movq_cfi r9, R9+8 + movq_cfi r10, R10+8 + movq_cfi r11, R11+8 + movq_cfi rbx, RBX+8 + movq_cfi rbp, RBP+8 + movq_cfi r12, R12+8 + movq_cfi r13, R13+8 + movq_cfi r14, R14+8 + movq_cfi r15, R15+8 + movl $1,%ebx + movl $MSR_GS_BASE,%ecx + rdmsr + testl %edx,%edx + js 1f /* negative -> in kernel */ + SWAPGS + xorl %ebx,%ebx +1: ret + CFI_ENDPROC +END(save_paranoid) + /* * A newly forked process directly context switches into this. */ @@ -1012,24 +1042,15 @@ END(spurious_interrupt) .endm /* error code is on the stack already */ - /* handle NMI like exceptions that can happen everywhere */ - .macro paranoidentry sym, ist=0, irqtrace=1 - SAVE_ALL - cld - movl $1,%ebx - movl $MSR_GS_BASE,%ecx - rdmsr - testl %edx,%edx - js 1f - SWAPGS - xorl %ebx,%ebx -1: + .macro paranoidentry sym ist=0 + subq $15*8, %rsp + CFI_ADJUST_CFA_OFFSET 15*8 + call save_paranoid + DEFAULT_FRAME 0 .if \ist movq %gs:pda_data_offset, %rbp .endif - .if \irqtrace TRACE_IRQS_OFF - .endif movq %rsp,%rdi movq ORIG_RAX(%rsp),%rsi movq $-1,ORIG_RAX(%rsp) @@ -1041,9 +1062,7 @@ END(spurious_interrupt) addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) .endif DISABLE_INTERRUPTS(CLBR_NONE) - .if \irqtrace TRACE_IRQS_OFF - .endif .endm /* @@ -1058,57 +1077,48 @@ END(spurious_interrupt) * is fundamentally NMI-unsafe. (we cannot change the soft and * hard flags at once, atomically) */ - .macro paranoidexit trace=1 + /* ebx: no swapgs flag */ -paranoid_exit\trace: +KPROBE_ENTRY(paranoid_exit) + INTR_FRAME testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore\trace + jnz paranoid_restore testl $3,CS(%rsp) - jnz paranoid_userspace\trace -paranoid_swapgs\trace: - .if \trace + jnz paranoid_userspace +paranoid_swapgs: TRACE_IRQS_IRETQ 0 - .endif SWAPGS_UNSAFE_STACK -paranoid_restore\trace: +paranoid_restore: RESTORE_ALL 8 jmp irq_return -paranoid_userspace\trace: +paranoid_userspace: GET_THREAD_INFO(%rcx) movl TI_flags(%rcx),%ebx andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs\trace + jz paranoid_swapgs movq %rsp,%rdi /* &pt_regs */ call sync_regs movq %rax,%rsp /* switch stack for scheduling */ testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule\trace + jnz paranoid_schedule movl %ebx,%edx /* arg3: thread flags */ - .if \trace TRACE_IRQS_ON - .endif ENABLE_INTERRUPTS(CLBR_NONE) xorl %esi,%esi /* arg2: oldset */ movq %rsp,%rdi /* arg1: &pt_regs */ call do_notify_resume DISABLE_INTERRUPTS(CLBR_NONE) - .if \trace TRACE_IRQS_OFF - .endif - jmp paranoid_userspace\trace -paranoid_schedule\trace: - .if \trace + jmp paranoid_userspace +paranoid_schedule: TRACE_IRQS_ON - .endif ENABLE_INTERRUPTS(CLBR_ANY) call schedule DISABLE_INTERRUPTS(CLBR_ANY) - .if \trace TRACE_IRQS_OFF - .endif - jmp paranoid_userspace\trace + jmp paranoid_userspace CFI_ENDPROC - .endm +END(paranoid_exit) /* * Exception entry point. This expects an error code/orig_rax on the stack. @@ -1326,20 +1336,63 @@ KPROBE_ENTRY(debug) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_debug, DEBUG_STACK - paranoidexit + jmp paranoid_exit + CFI_ENDPROC KPROBE_END(debug) /* runs on exception stack */ KPROBE_ENTRY(nmi) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $-1 - CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_nmi, 0, 0 + pushq_cfi $-1 + subq $15*8, %rsp + CFI_ADJUST_CFA_OFFSET 15*8 + call save_paranoid + DEFAULT_FRAME 0 + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + movq %rsp,%rdi + movq ORIG_RAX(%rsp),%rsi + movq $-1,ORIG_RAX(%rsp) + call do_nmi + DISABLE_INTERRUPTS(CLBR_NONE) #ifdef CONFIG_TRACE_IRQFLAGS - paranoidexit 0 + /* paranoidexit; without TRACE_IRQS_OFF */ + /* ebx: no swapgs flag */ +nmi_exit: + testl %ebx,%ebx /* swapgs needed? */ + jnz nmi_restore + testl $3,CS(%rsp) + jnz nmi_userspace +nmi_swapgs: + SWAPGS_UNSAFE_STACK +nmi_restore: + RESTORE_ALL 8 + jmp irq_return +nmi_userspace: + GET_THREAD_INFO(%rcx) + movl TI_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz nmi_swapgs + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz nmi_schedule + movl %ebx,%edx /* arg3: thread flags */ + ENABLE_INTERRUPTS(CLBR_NONE) + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + DISABLE_INTERRUPTS(CLBR_NONE) + jmp nmi_userspace +nmi_schedule: + ENABLE_INTERRUPTS(CLBR_ANY) + call schedule + DISABLE_INTERRUPTS(CLBR_ANY) + jmp nmi_userspace + CFI_ENDPROC #else - jmp paranoid_exit1 + jmp paranoid_exit CFI_ENDPROC #endif KPROBE_END(nmi) @@ -1350,7 +1403,7 @@ KPROBE_ENTRY(int3) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_int3, DEBUG_STACK - jmp paranoid_exit1 + jmp paranoid_exit CFI_ENDPROC KPROBE_END(int3) @@ -1375,7 +1428,7 @@ ENTRY(double_fault) XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_double_fault - jmp paranoid_exit1 + jmp paranoid_exit CFI_ENDPROC END(double_fault) @@ -1392,7 +1445,7 @@ ENTRY(stack_segment) XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME paranoidentry do_stack_segment - jmp paranoid_exit1 + jmp paranoid_exit CFI_ENDPROC END(stack_segment) @@ -1420,7 +1473,7 @@ ENTRY(machine_check) pushq $0 CFI_ADJUST_CFA_OFFSET 8 paranoidentry do_machine_check - jmp paranoid_exit1 + jmp paranoid_exit CFI_ENDPROC END(machine_check) #endif From b8b1d08bf6fe7c09e6cb2294bc0e5e964b361241 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 21 Nov 2008 16:44:28 +0100 Subject: [PATCH 12/26] x86: entry_64.S: split out some macro's and move common code to paranoid_exit Impact: cleanup DISABLE_INTERRUPTS(CLBR_NONE)/TRACE_IRQS_OFF is now always executed just before paranoid_exit. Move it there. Split out paranoidzeroentry, paranoiderrorentry, and paranoidzeroentry_ist to get more readable macro's. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 102 ++++++++++++++++++------------------- 1 file changed, 51 insertions(+), 51 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index fad777b1136..692c1da6190 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1026,6 +1026,39 @@ END(spurious_interrupt) CFI_ENDPROC .endm + .macro paranoidzeroentry sym + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME + pushq $-1 /* ORIG_RAX: no syscall to restart */ + CFI_ADJUST_CFA_OFFSET 8 + subq $15*8, %rsp + call save_paranoid + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ + xorl %esi,%esi /* no error code */ + call \sym + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + .endm + + .macro paranoidzeroentry_ist sym ist + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME + pushq $-1 /* ORIG_RAX: no syscall to restart */ + CFI_ADJUST_CFA_OFFSET 8 + subq $15*8, %rsp + call save_paranoid + TRACE_IRQS_OFF + movq %rsp,%rdi /* pt_regs pointer */ + xorl %esi,%esi /* no error code */ + movq %gs:pda_data_offset, %rbp + subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + call \sym + addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC + .endm + .macro errorentry sym XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME @@ -1042,27 +1075,20 @@ END(spurious_interrupt) .endm /* error code is on the stack already */ - .macro paranoidentry sym ist=0 - subq $15*8, %rsp + .macro paranoiderrorentry sym + XCPT_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME + subq $15*8,%rsp CFI_ADJUST_CFA_OFFSET 15*8 call save_paranoid DEFAULT_FRAME 0 - .if \ist - movq %gs:pda_data_offset, %rbp - .endif TRACE_IRQS_OFF - movq %rsp,%rdi - movq ORIG_RAX(%rsp),%rsi - movq $-1,ORIG_RAX(%rsp) - .if \ist - subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) - .endif + movq %rsp,%rdi /* pt_regs pointer */ + movq ORIG_RAX(%rsp),%rsi /* get error code */ + movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ call \sym - .if \ist - addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) - .endif - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF + jmp paranoid_exit /* %ebx: no swapgs flag */ + CFI_ENDPROC .endm /* @@ -1081,6 +1107,8 @@ END(spurious_interrupt) /* ebx: no swapgs flag */ KPROBE_ENTRY(paranoid_exit) INTR_FRAME + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF testl %ebx,%ebx /* swapgs needed? */ jnz paranoid_restore testl $3,CS(%rsp) @@ -1331,13 +1359,7 @@ END(device_not_available) /* runs on exception stack */ KPROBE_ENTRY(debug) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $0 - CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_debug, DEBUG_STACK - jmp paranoid_exit - CFI_ENDPROC + paranoidzeroentry_ist do_debug, DEBUG_STACK KPROBE_END(debug) /* runs on exception stack */ @@ -1351,14 +1373,12 @@ KPROBE_ENTRY(nmi) DEFAULT_FRAME 0 /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ movq %rsp,%rdi - movq ORIG_RAX(%rsp),%rsi - movq $-1,ORIG_RAX(%rsp) + movq $-1,%rsi call do_nmi - DISABLE_INTERRUPTS(CLBR_NONE) #ifdef CONFIG_TRACE_IRQFLAGS /* paranoidexit; without TRACE_IRQS_OFF */ /* ebx: no swapgs flag */ -nmi_exit: + DISABLE_INTERRUPTS(CLBR_NONE) testl %ebx,%ebx /* swapgs needed? */ jnz nmi_restore testl $3,CS(%rsp) @@ -1398,13 +1418,7 @@ nmi_schedule: KPROBE_END(nmi) KPROBE_ENTRY(int3) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $0 - CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_int3, DEBUG_STACK - jmp paranoid_exit - CFI_ENDPROC + paranoidzeroentry_ist do_int3, DEBUG_STACK KPROBE_END(int3) ENTRY(overflow) @@ -1425,11 +1439,7 @@ END(coprocessor_segment_overrun) /* runs on exception stack */ ENTRY(double_fault) - XCPT_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - paranoidentry do_double_fault - jmp paranoid_exit - CFI_ENDPROC + paranoiderrorentry do_double_fault END(double_fault) ENTRY(invalid_TSS) @@ -1442,11 +1452,7 @@ END(segment_not_present) /* runs on exception stack */ ENTRY(stack_segment) - XCPT_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - paranoidentry do_stack_segment - jmp paranoid_exit - CFI_ENDPROC + paranoiderrorentry do_stack_segment END(stack_segment) KPROBE_ENTRY(general_protection) @@ -1468,13 +1474,7 @@ END(spurious_interrupt_bug) #ifdef CONFIG_X86_MCE /* runs on exception stack */ ENTRY(machine_check) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq $0 - CFI_ADJUST_CFA_OFFSET 8 - paranoidentry do_machine_check - jmp paranoid_exit - CFI_ENDPROC + paranoidzeroentry do_machine_check END(machine_check) #endif From c81084114f6ff957bc6b5a0048350479c1c1f7b3 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Fri, 21 Nov 2008 22:59:52 +0100 Subject: [PATCH 13/26] x86: split out some macro's and move common code to paranoid_exit, fix Impact: fix bootup crash Even though it tested fine for me, there was still a bug in the first patch: I have overlooked a call to ptregscall_common. This patch fixes that, I think, but the code is never executed for me while running a debian install... (I tested this by putting an "1:jmp 1b" in there.) Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 692c1da6190..e5ddf573ded 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -529,10 +529,13 @@ sysret_signal: jc sysret_audit #endif /* edx: work flags (arg3) */ - leaq do_notify_resume(%rip),%rax leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 xorl %esi,%esi # oldset -> arg2 - call ptregscall_common + SAVE_REST + FIXUP_TOP_OF_STACK %r11 + call do_notify_resume + RESTORE_TOP_OF_STACK %r11 + RESTORE_REST movl $_TIF_WORK_MASK,%edi /* Use IRET because user could have changed frame. This works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ From 8a2503fa4a6fae8ee42140b339f37373fc6acaae Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 23 Nov 2008 14:53:43 +0300 Subject: [PATCH 14/26] x86: move dwarf2 related macro to dwarf2.h Impact: cleanup Move recently introduced dwarf2 macros to dwarf2.h file. It allow us to not duplicate them in assembly files. Active usage of _cfi macros don't make assembly files more obvious to understand but we already have a lot of macros there which requires to search the definitions of them *anyway*. But at least it make every cfi usage one line shorter. Also some code alignment is done. Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/include/asm/dwarf2.h | 97 ++++++++++++++++++++++++----------- arch/x86/kernel/entry_64.S | 23 --------- 2 files changed, 66 insertions(+), 54 deletions(-) diff --git a/arch/x86/include/asm/dwarf2.h b/arch/x86/include/asm/dwarf2.h index 804b6e6be92..3afc5e87cfd 100644 --- a/arch/x86/include/asm/dwarf2.h +++ b/arch/x86/include/asm/dwarf2.h @@ -6,56 +6,91 @@ #endif /* - Macros for dwarf2 CFI unwind table entries. - See "as.info" for details on these pseudo ops. Unfortunately - they are only supported in very new binutils, so define them - away for older version. + * Macros for dwarf2 CFI unwind table entries. + * See "as.info" for details on these pseudo ops. Unfortunately + * they are only supported in very new binutils, so define them + * away for older version. */ #ifdef CONFIG_AS_CFI -#define CFI_STARTPROC .cfi_startproc -#define CFI_ENDPROC .cfi_endproc -#define CFI_DEF_CFA .cfi_def_cfa -#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register -#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset -#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset -#define CFI_OFFSET .cfi_offset -#define CFI_REL_OFFSET .cfi_rel_offset -#define CFI_REGISTER .cfi_register -#define CFI_RESTORE .cfi_restore -#define CFI_REMEMBER_STATE .cfi_remember_state -#define CFI_RESTORE_STATE .cfi_restore_state -#define CFI_UNDEFINED .cfi_undefined +#define CFI_STARTPROC .cfi_startproc +#define CFI_ENDPROC .cfi_endproc +#define CFI_DEF_CFA .cfi_def_cfa +#define CFI_DEF_CFA_REGISTER .cfi_def_cfa_register +#define CFI_DEF_CFA_OFFSET .cfi_def_cfa_offset +#define CFI_ADJUST_CFA_OFFSET .cfi_adjust_cfa_offset +#define CFI_OFFSET .cfi_offset +#define CFI_REL_OFFSET .cfi_rel_offset +#define CFI_REGISTER .cfi_register +#define CFI_RESTORE .cfi_restore +#define CFI_REMEMBER_STATE .cfi_remember_state +#define CFI_RESTORE_STATE .cfi_restore_state +#define CFI_UNDEFINED .cfi_undefined #ifdef CONFIG_AS_CFI_SIGNAL_FRAME -#define CFI_SIGNAL_FRAME .cfi_signal_frame +#define CFI_SIGNAL_FRAME .cfi_signal_frame #else #define CFI_SIGNAL_FRAME #endif #else -/* Due to the structure of pre-exisiting code, don't use assembler line - comment character # to ignore the arguments. Instead, use a dummy macro. */ +/* + * Due to the structure of pre-exisiting code, don't use assembler line + * comment character # to ignore the arguments. Instead, use a dummy macro. + */ .macro cfi_ignore a=0, b=0, c=0, d=0 .endm -#define CFI_STARTPROC cfi_ignore -#define CFI_ENDPROC cfi_ignore -#define CFI_DEF_CFA cfi_ignore +#define CFI_STARTPROC cfi_ignore +#define CFI_ENDPROC cfi_ignore +#define CFI_DEF_CFA cfi_ignore #define CFI_DEF_CFA_REGISTER cfi_ignore #define CFI_DEF_CFA_OFFSET cfi_ignore #define CFI_ADJUST_CFA_OFFSET cfi_ignore -#define CFI_OFFSET cfi_ignore -#define CFI_REL_OFFSET cfi_ignore -#define CFI_REGISTER cfi_ignore -#define CFI_RESTORE cfi_ignore -#define CFI_REMEMBER_STATE cfi_ignore -#define CFI_RESTORE_STATE cfi_ignore -#define CFI_UNDEFINED cfi_ignore -#define CFI_SIGNAL_FRAME cfi_ignore +#define CFI_OFFSET cfi_ignore +#define CFI_REL_OFFSET cfi_ignore +#define CFI_REGISTER cfi_ignore +#define CFI_RESTORE cfi_ignore +#define CFI_REMEMBER_STATE cfi_ignore +#define CFI_RESTORE_STATE cfi_ignore +#define CFI_UNDEFINED cfi_ignore +#define CFI_SIGNAL_FRAME cfi_ignore #endif +/* + * An attempt to make CFI annotations more or less + * correct and shorter. It is implied that you know + * what you're doing if you use them. + */ +#ifdef __ASSEMBLY__ +#ifdef CONFIG_X86_64 + .macro pushq_cfi reg + pushq \reg + CFI_ADJUST_CFA_OFFSET 8 + .endm + + .macro popq_cfi reg + popq \reg + CFI_ADJUST_CFA_OFFSET -8 + .endm + + .macro movq_cfi reg offset=0 + movq %\reg, \offset(%rsp) + CFI_REL_OFFSET \reg, \offset + .endm + + .macro movq_cfi_restore offset reg + movq \offset(%rsp), %\reg + CFI_RESTORE \reg + .endm +#else /*!CONFIG_X86_64*/ + + /* 32bit defenitions are missed yet */ + +#endif /*!CONFIG_X86_64*/ +#endif /*__ASSEMBLY__*/ + #endif /* _ASM_X86_DWARF2_H */ diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e5ddf573ded..249eb604e71 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -60,29 +60,6 @@ #define __AUDIT_ARCH_LE 0x40000000 .code64 -/* - * Some macro's to hide the most frequently occuring CFI annotations. - */ - .macro pushq_cfi reg - pushq \reg - CFI_ADJUST_CFA_OFFSET 8 - .endm - - .macro popq_cfi reg - popq \reg - CFI_ADJUST_CFA_OFFSET -8 - .endm - - .macro movq_cfi reg offset=0 - movq %\reg, \offset(%rsp) - CFI_REL_OFFSET \reg, \offset - .endm - - .macro movq_cfi_restore offset reg - movq \offset(%rsp), %\reg - CFI_RESTORE \reg - .endm - #ifdef CONFIG_FUNCTION_TRACER #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(mcount) From 322648d1ba75280d62f114d47048beb0b35f5047 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sun, 23 Nov 2008 10:08:28 +0100 Subject: [PATCH 15/26] x86: include ENTRY/END in entry handlers in entry_64.S Impact: cleanup of entry_64.S Except for the order and the place of the functions, this patch should not change the generated code. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 261 ++++++++++++++++--------------------- 1 file changed, 110 insertions(+), 151 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 249eb604e71..1a856c0b21a 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -922,76 +922,70 @@ END(common_interrupt) /* * APIC interrupts. */ - .p2align 5 - - .macro apicinterrupt num,func +.macro apicinterrupt num sym do_sym +ENTRY(\sym) INTR_FRAME pushq $~(\num) CFI_ADJUST_CFA_OFFSET 8 - interrupt \func + interrupt \do_sym jmp ret_from_intr CFI_ENDPROC - .endm - -ENTRY(thermal_interrupt) - apicinterrupt THERMAL_APIC_VECTOR,smp_thermal_interrupt -END(thermal_interrupt) - -ENTRY(threshold_interrupt) - apicinterrupt THRESHOLD_APIC_VECTOR,mce_threshold_interrupt -END(threshold_interrupt) +END(\sym) +.endm #ifdef CONFIG_SMP -ENTRY(reschedule_interrupt) - apicinterrupt RESCHEDULE_VECTOR,smp_reschedule_interrupt -END(reschedule_interrupt) - - .macro INVALIDATE_ENTRY num -ENTRY(invalidate_interrupt\num) - apicinterrupt INVALIDATE_TLB_VECTOR_START+\num,smp_invalidate_interrupt -END(invalidate_interrupt\num) - .endm - - INVALIDATE_ENTRY 0 - INVALIDATE_ENTRY 1 - INVALIDATE_ENTRY 2 - INVALIDATE_ENTRY 3 - INVALIDATE_ENTRY 4 - INVALIDATE_ENTRY 5 - INVALIDATE_ENTRY 6 - INVALIDATE_ENTRY 7 - -ENTRY(call_function_interrupt) - apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt -END(call_function_interrupt) -ENTRY(call_function_single_interrupt) - apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt -END(call_function_single_interrupt) -ENTRY(irq_move_cleanup_interrupt) - apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt -END(irq_move_cleanup_interrupt) +apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ + irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif -ENTRY(apic_timer_interrupt) - apicinterrupt LOCAL_TIMER_VECTOR,smp_apic_timer_interrupt -END(apic_timer_interrupt) +apicinterrupt 220 \ + uv_bau_message_intr1 uv_bau_message_interrupt +apicinterrupt LOCAL_TIMER_VECTOR \ + apic_timer_interrupt smp_apic_timer_interrupt -ENTRY(uv_bau_message_intr1) - apicinterrupt 220,uv_bau_message_interrupt -END(uv_bau_message_intr1) +#ifdef CONFIG_SMP +apicinterrupt INVALIDATE_TLB_VECTOR_START+0 \ + invalidate_interrupt0 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+1 \ + invalidate_interrupt1 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+2 \ + invalidate_interrupt2 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+3 \ + invalidate_interrupt3 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+4 \ + invalidate_interrupt4 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+5 \ + invalidate_interrupt5 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+6 \ + invalidate_interrupt6 smp_invalidate_interrupt +apicinterrupt INVALIDATE_TLB_VECTOR_START+7 \ + invalidate_interrupt7 smp_invalidate_interrupt +#endif -ENTRY(error_interrupt) - apicinterrupt ERROR_APIC_VECTOR,smp_error_interrupt -END(error_interrupt) +apicinterrupt THRESHOLD_APIC_VECTOR \ + threshold_interrupt mce_threshold_interrupt +apicinterrupt THERMAL_APIC_VECTOR \ + thermal_interrupt smp_thermal_interrupt -ENTRY(spurious_interrupt) - apicinterrupt SPURIOUS_APIC_VECTOR,smp_spurious_interrupt -END(spurious_interrupt) +#ifdef CONFIG_SMP +apicinterrupt CALL_FUNCTION_SINGLE_VECTOR \ + call_function_single_interrupt smp_call_function_single_interrupt +apicinterrupt CALL_FUNCTION_VECTOR \ + call_function_interrupt smp_call_function_interrupt +apicinterrupt RESCHEDULE_VECTOR \ + reschedule_interrupt smp_reschedule_interrupt +#endif + +apicinterrupt ERROR_APIC_VECTOR \ + error_interrupt smp_error_interrupt +apicinterrupt SPURIOUS_APIC_VECTOR \ + spurious_interrupt smp_spurious_interrupt /* * Exception entry points. */ - .macro zeroentry sym +.macro zeroentry sym do_sym +ENTRY(\sym) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ @@ -1001,12 +995,14 @@ END(spurious_interrupt) DEFAULT_FRAME 0 movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - call \sym + call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC - .endm +END(\sym) +.endm - .macro paranoidzeroentry sym +.macro paranoidzeroentry sym do_sym +KPROBE_ENTRY(\sym) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -1016,12 +1012,14 @@ END(spurious_interrupt) TRACE_IRQS_OFF movq %rsp,%rdi /* pt_regs pointer */ xorl %esi,%esi /* no error code */ - call \sym + call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC - .endm +KPROBE_END(\sym) +.endm - .macro paranoidzeroentry_ist sym ist +.macro paranoidzeroentry_ist sym do_sym ist +KPROBE_ENTRY(\sym) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -1033,13 +1031,19 @@ END(spurious_interrupt) xorl %esi,%esi /* no error code */ movq %gs:pda_data_offset, %rbp subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) - call \sym + call \do_sym addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC - .endm +KPROBE_END(\sym) +.endm - .macro errorentry sym +.macro errorentry sym do_sym entry=0 +.if \entry +KPROBE_ENTRY(\sym) +.else +ENTRY(\sym) +.endif XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $15*8,%rsp @@ -1049,13 +1053,23 @@ END(spurious_interrupt) movq %rsp,%rdi /* pt_regs pointer */ movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ - call \sym + call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC - .endm +.if \entry +KPROBE_END(\sym) +.else +END(\sym) +.endif +.endm /* error code is on the stack already */ - .macro paranoiderrorentry sym +.macro paranoiderrorentry sym do_sym entry=1 +.if \entry +KPROBE_ENTRY(\sym) +.else +ENTRY(\sym) +.endif XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $15*8,%rsp @@ -1066,10 +1080,37 @@ END(spurious_interrupt) movq %rsp,%rdi /* pt_regs pointer */ movq ORIG_RAX(%rsp),%rsi /* get error code */ movq $-1,ORIG_RAX(%rsp) /* no syscall to restart */ - call \sym + call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC - .endm +.if \entry +KPROBE_END(\sym) +.else +END(\sym) +.endif +.endm + +zeroentry divide_error do_divide_error +paranoidzeroentry_ist debug do_debug DEBUG_STACK +paranoidzeroentry_ist int3 do_int3 DEBUG_STACK +zeroentry overflow do_overflow +zeroentry bounds do_bounds +zeroentry invalid_op do_invalid_op +zeroentry device_not_available do_device_not_available +paranoiderrorentry double_fault do_double_fault 0 +zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun +errorentry invalid_TSS do_invalid_TSS +errorentry segment_not_present do_segment_not_present +paranoiderrorentry stack_segment do_stack_segment +errorentry general_protection do_general_protection 1 +errorentry page_fault do_page_fault 1 +zeroentry spurious_interrupt_bug do_spurious_interrupt_bug +zeroentry coprocessor_error do_coprocessor_error +errorentry alignment_check do_alignment_check +#ifdef CONFIG_X86_MCE +paranoidzeroentry machine_check do_machine_check +#endif +zeroentry simd_coprocessor_error do_simd_coprocessor_error /* * "Paranoid" exit path from exception stack. @@ -1321,26 +1362,7 @@ ENTRY(kernel_execve) CFI_ENDPROC ENDPROC(kernel_execve) -KPROBE_ENTRY(page_fault) - errorentry do_page_fault -KPROBE_END(page_fault) -ENTRY(coprocessor_error) - zeroentry do_coprocessor_error -END(coprocessor_error) - -ENTRY(simd_coprocessor_error) - zeroentry do_simd_coprocessor_error -END(simd_coprocessor_error) - -ENTRY(device_not_available) - zeroentry do_device_not_available -END(device_not_available) - - /* runs on exception stack */ -KPROBE_ENTRY(debug) - paranoidzeroentry_ist do_debug, DEBUG_STACK -KPROBE_END(debug) /* runs on exception stack */ KPROBE_ENTRY(nmi) @@ -1397,67 +1419,6 @@ nmi_schedule: #endif KPROBE_END(nmi) -KPROBE_ENTRY(int3) - paranoidzeroentry_ist do_int3, DEBUG_STACK -KPROBE_END(int3) - -ENTRY(overflow) - zeroentry do_overflow -END(overflow) - -ENTRY(bounds) - zeroentry do_bounds -END(bounds) - -ENTRY(invalid_op) - zeroentry do_invalid_op -END(invalid_op) - -ENTRY(coprocessor_segment_overrun) - zeroentry do_coprocessor_segment_overrun -END(coprocessor_segment_overrun) - - /* runs on exception stack */ -ENTRY(double_fault) - paranoiderrorentry do_double_fault -END(double_fault) - -ENTRY(invalid_TSS) - errorentry do_invalid_TSS -END(invalid_TSS) - -ENTRY(segment_not_present) - errorentry do_segment_not_present -END(segment_not_present) - - /* runs on exception stack */ -ENTRY(stack_segment) - paranoiderrorentry do_stack_segment -END(stack_segment) - -KPROBE_ENTRY(general_protection) - errorentry do_general_protection -KPROBE_END(general_protection) - -ENTRY(alignment_check) - errorentry do_alignment_check -END(alignment_check) - -ENTRY(divide_error) - zeroentry do_divide_error -END(divide_error) - -ENTRY(spurious_interrupt_bug) - zeroentry do_spurious_interrupt_bug -END(spurious_interrupt_bug) - -#ifdef CONFIG_X86_MCE - /* runs on exception stack */ -ENTRY(machine_check) - paranoidzeroentry do_machine_check -END(machine_check) -#endif - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1486,9 +1447,7 @@ KPROBE_ENTRY(ignore_sysret) ENDPROC(ignore_sysret) #ifdef CONFIG_XEN -ENTRY(xen_hypervisor_callback) - zeroentry xen_do_hypervisor_callback -END(xen_hypervisor_callback) +zeroentry xen_hypervisor_callback xen_do_hypervisor_callback /* # A note on the "critical region" in our callback handler. From 6efdcfaf16cc4fc76651603e083cf3ec4bd1e6de Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Sun, 23 Nov 2008 10:15:32 +0100 Subject: [PATCH 16/26] x86: KPROBE_ENTRY should be paired wth KPROBE_END Impact: move some code out of .kprobes.text KPROBE_ENTRY switches code generation to .kprobes.text, and KPROBE_END uses .popsection to get back to the previous section (.text, normally). Also replace ENDPROC by END, for consistency. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1a856c0b21a..f2d546e1635 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1167,7 +1167,7 @@ paranoid_schedule: TRACE_IRQS_OFF jmp paranoid_userspace CFI_ENDPROC -END(paranoid_exit) +KPROBE_END(paranoid_exit) /* * Exception entry point. This expects an error code/orig_rax on the stack. @@ -1259,7 +1259,7 @@ gs_change: CFI_ADJUST_CFA_OFFSET -8 ret CFI_ENDPROC -ENDPROC(native_load_gs_index) +END(native_load_gs_index) .section __ex_table,"a" .align 8 @@ -1313,7 +1313,7 @@ ENTRY(kernel_thread) UNFAKE_STACK_FRAME ret CFI_ENDPROC -ENDPROC(kernel_thread) +END(kernel_thread) child_rip: pushq $0 # fake return address @@ -1329,7 +1329,7 @@ child_rip: mov %eax, %edi call do_exit CFI_ENDPROC -ENDPROC(child_rip) +END(child_rip) /* * execve(). This function needs to use IRET, not SYSRET, to set up all state properly. @@ -1360,9 +1360,7 @@ ENTRY(kernel_execve) UNFAKE_STACK_FRAME ret CFI_ENDPROC -ENDPROC(kernel_execve) - - +END(kernel_execve) /* runs on exception stack */ KPROBE_ENTRY(nmi) @@ -1437,14 +1435,14 @@ ENTRY(call_softirq) decl %gs:pda_irqcount ret CFI_ENDPROC -ENDPROC(call_softirq) +END(call_softirq) KPROBE_ENTRY(ignore_sysret) CFI_STARTPROC mov $-ENOSYS,%eax sysret CFI_ENDPROC -ENDPROC(ignore_sysret) +KPROBE_END(ignore_sysret) #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback From 3b6c52b5b634ae41d762cb174465272d69198160 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sun, 23 Nov 2008 20:21:39 +0300 Subject: [PATCH 17/26] x86: introduce ENTRY(KPROBE_ENTRY)_X86 assembly helpers to catch unbalanced declaration v3 Impact: make ENTRY()/END() macros more capable It's usefull to catch unbalanced or messed or mixed declarations of ENTRY and KPROBES. These macros would help a bit. For example the following code would compile without problems ENTRY_X86(mcount) retq END_X86(mcount) But if you forget and mess the following form ENTRY_X86(mcount) retq END(mcount) ENTRY_X86(ftrace_caller) The assembler will issue the following message: Error: ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed Actually the checking is performed at every _X86 macro so maybe it's good idea to put ENTRY_KPROBE_FINAL_X86 at the end of .S file to be sure you didn't miss anything. Signed-off-by: Cyrill Gorcunov Cc: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/include/asm/linkage.h | 60 ++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/arch/x86/include/asm/linkage.h b/arch/x86/include/asm/linkage.h index f61ee8f937e..5d98d0b68ff 100644 --- a/arch/x86/include/asm/linkage.h +++ b/arch/x86/include/asm/linkage.h @@ -57,5 +57,65 @@ #define __ALIGN_STR ".align 16,0x90" #endif +/* + * to check ENTRY_X86/END_X86 and + * KPROBE_ENTRY_X86/KPROBE_END_X86 + * unbalanced-missed-mixed appearance + */ +#define __set_entry_x86 .set ENTRY_X86_IN, 0 +#define __unset_entry_x86 .set ENTRY_X86_IN, 1 +#define __set_kprobe_x86 .set KPROBE_X86_IN, 0 +#define __unset_kprobe_x86 .set KPROBE_X86_IN, 1 + +#define __macro_err_x86 .error "ENTRY_X86/KPROBE_X86 unbalanced,missed,mixed" + +#define __check_entry_x86 \ + .ifdef ENTRY_X86_IN; \ + .ifeq ENTRY_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_kprobe_x86 \ + .ifdef KPROBE_X86_IN; \ + .ifeq KPROBE_X86_IN; \ + __macro_err_x86; \ + .abort; \ + .endif; \ + .endif + +#define __check_entry_kprobe_x86 \ + __check_entry_x86; \ + __check_kprobe_x86 + +#define ENTRY_KPROBE_FINAL_X86 __check_entry_kprobe_x86 + +#define ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_entry_x86; \ + .globl name; \ + __ALIGN; \ + name: + +#define END_X86(name) \ + __unset_entry_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name + +#define KPROBE_ENTRY_X86(name) \ + __check_entry_kprobe_x86; \ + __set_kprobe_x86; \ + .pushsection .kprobes.text, "ax"; \ + .globl name; \ + __ALIGN; \ + name: + +#define KPROBE_END_X86(name) \ + __unset_kprobe_x86; \ + __check_entry_kprobe_x86; \ + .size name, .-name; \ + .popsection + #endif /* _ASM_X86_LINKAGE_H */ From ddeb8f2149de280d54f0c8910cead42e6042b2cb Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 24 Nov 2008 13:24:28 +0100 Subject: [PATCH 18/26] x86_64: get rid of the use of KPROBE_ENTRY / KPROBE_END Impact: clean up assembly macros and annotations - with some object impact entry_64.S is the only user of KPROBE_ENTRY / KPROBE_END on x86_64. This patch reorders entry_64.S and explicitly generates a separate section for functions that need the protection. The generated code before and after the patch is equal. Implicitly changing sections in assembly files makes it more difficult to follow why the assembler is doing certain things. For example, .p2align 5 KPROBE_ENTRY(...) was not doing what you would expect. Other section changes (__ex_table, .fixup, .init.rodata) are done explicitly already. Signed-off-by: Alexander van Heukelum Acked-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 444 ++++++++++++++++++------------------- 1 file changed, 220 insertions(+), 224 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index f2d546e1635..38fcd0517c3 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1002,7 +1002,7 @@ END(\sym) .endm .macro paranoidzeroentry sym do_sym -KPROBE_ENTRY(\sym) +ENTRY(\sym) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -1015,11 +1015,11 @@ KPROBE_ENTRY(\sym) call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -KPROBE_END(\sym) +END(\sym) .endm .macro paranoidzeroentry_ist sym do_sym ist -KPROBE_ENTRY(\sym) +ENTRY(\sym) INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 /* ORIG_RAX: no syscall to restart */ @@ -1035,15 +1035,11 @@ KPROBE_ENTRY(\sym) addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -KPROBE_END(\sym) +END(\sym) .endm -.macro errorentry sym do_sym entry=0 -.if \entry -KPROBE_ENTRY(\sym) -.else +.macro errorentry sym do_sym ENTRY(\sym) -.endif XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $15*8,%rsp @@ -1056,20 +1052,12 @@ ENTRY(\sym) call \do_sym jmp error_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -.if \entry -KPROBE_END(\sym) -.else END(\sym) -.endif .endm /* error code is on the stack already */ -.macro paranoiderrorentry sym do_sym entry=1 -.if \entry -KPROBE_ENTRY(\sym) -.else +.macro paranoiderrorentry sym do_sym ENTRY(\sym) -.endif XCPT_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME subq $15*8,%rsp @@ -1083,166 +1071,23 @@ ENTRY(\sym) call \do_sym jmp paranoid_exit /* %ebx: no swapgs flag */ CFI_ENDPROC -.if \entry -KPROBE_END(\sym) -.else END(\sym) -.endif .endm zeroentry divide_error do_divide_error -paranoidzeroentry_ist debug do_debug DEBUG_STACK -paranoidzeroentry_ist int3 do_int3 DEBUG_STACK zeroentry overflow do_overflow zeroentry bounds do_bounds zeroentry invalid_op do_invalid_op zeroentry device_not_available do_device_not_available -paranoiderrorentry double_fault do_double_fault 0 +paranoiderrorentry double_fault do_double_fault zeroentry coprocessor_segment_overrun do_coprocessor_segment_overrun errorentry invalid_TSS do_invalid_TSS errorentry segment_not_present do_segment_not_present -paranoiderrorentry stack_segment do_stack_segment -errorentry general_protection do_general_protection 1 -errorentry page_fault do_page_fault 1 zeroentry spurious_interrupt_bug do_spurious_interrupt_bug zeroentry coprocessor_error do_coprocessor_error errorentry alignment_check do_alignment_check -#ifdef CONFIG_X86_MCE -paranoidzeroentry machine_check do_machine_check -#endif zeroentry simd_coprocessor_error do_simd_coprocessor_error - /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take - * any kernel state for granted. - * We don't do kernel preemption checks here, because only - * NMI should be common and it does not enable IRQs and - * cannot get reschedule ticks. - * - * "trace" is 0 for the NMI handler only, because irq-tracing - * is fundamentally NMI-unsafe. (we cannot change the soft and - * hard flags at once, atomically) - */ - - /* ebx: no swapgs flag */ -KPROBE_ENTRY(paranoid_exit) - INTR_FRAME - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - testl %ebx,%ebx /* swapgs needed? */ - jnz paranoid_restore - testl $3,CS(%rsp) - jnz paranoid_userspace -paranoid_swapgs: - TRACE_IRQS_IRETQ 0 - SWAPGS_UNSAFE_STACK -paranoid_restore: - RESTORE_ALL 8 - jmp irq_return -paranoid_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz paranoid_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz paranoid_schedule - movl %ebx,%edx /* arg3: thread flags */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - jmp paranoid_userspace -paranoid_schedule: - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY) - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) - TRACE_IRQS_OFF - jmp paranoid_userspace - CFI_ENDPROC -KPROBE_END(paranoid_exit) - -/* - * Exception entry point. This expects an error code/orig_rax on the stack. - * returns in "no swapgs flag" in %ebx. - */ -KPROBE_ENTRY(error_entry) - XCPT_FRAME - CFI_ADJUST_CFA_OFFSET 15*8 - /* oldrax contains error code */ - cld - movq_cfi rdi, RDI+8 - movq_cfi rsi, RSI+8 - movq_cfi rdx, RDX+8 - movq_cfi rcx, RCX+8 - movq_cfi rax, RAX+8 - movq_cfi r8, R8+8 - movq_cfi r9, R9+8 - movq_cfi r10, R10+8 - movq_cfi r11, R11+8 - movq_cfi rbx, RBX+8 - movq_cfi rbp, RBP+8 - movq_cfi r12, R12+8 - movq_cfi r13, R13+8 - movq_cfi r14, R14+8 - movq_cfi r15, R15+8 - xorl %ebx,%ebx - testl $3,CS+8(%rsp) - je error_kernelspace -error_swapgs: - SWAPGS -error_sti: - TRACE_IRQS_OFF - ret - CFI_ENDPROC - -/* - * There are two places in the kernel that can potentially fault with - * usergs. Handle them here. The exception handlers after iret run with - * kernel gs again, so don't set the user space flag. B stepping K8s - * sometimes report an truncated RIP for IRET exceptions returning to - * compat mode. Check for these here too. - */ -error_kernelspace: - incl %ebx - leaq irq_return(%rip),%rcx - cmpq %rcx,RIP+8(%rsp) - je error_swapgs - movl %ecx,%ecx /* zero extend */ - cmpq %rcx,RIP+8(%rsp) - je error_swapgs - cmpq $gs_change,RIP+8(%rsp) - je error_swapgs - jmp error_sti -KPROBE_END(error_entry) - - -/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ -KPROBE_ENTRY(error_exit) - DEFAULT_FRAME - movl %ebx,%eax - RESTORE_REST - DISABLE_INTERRUPTS(CLBR_NONE) - TRACE_IRQS_OFF - GET_THREAD_INFO(%rcx) - testl %eax,%eax - jne retint_kernel - LOCKDEP_SYS_EXIT_IRQ - movl TI_flags(%rcx),%edx - movl $_TIF_WORK_MASK,%edi - andl %edi,%edx - jnz retint_careful - jmp retint_swapgs - CFI_ENDPROC -KPROBE_END(error_exit) - /* Reload gs selector with exception handling */ /* edi: new selector */ ENTRY(native_load_gs_index) @@ -1362,61 +1207,6 @@ ENTRY(kernel_execve) CFI_ENDPROC END(kernel_execve) - /* runs on exception stack */ -KPROBE_ENTRY(nmi) - INTR_FRAME - PARAVIRT_ADJUST_EXCEPTION_FRAME - pushq_cfi $-1 - subq $15*8, %rsp - CFI_ADJUST_CFA_OFFSET 15*8 - call save_paranoid - DEFAULT_FRAME 0 - /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ - movq %rsp,%rdi - movq $-1,%rsi - call do_nmi -#ifdef CONFIG_TRACE_IRQFLAGS - /* paranoidexit; without TRACE_IRQS_OFF */ - /* ebx: no swapgs flag */ - DISABLE_INTERRUPTS(CLBR_NONE) - testl %ebx,%ebx /* swapgs needed? */ - jnz nmi_restore - testl $3,CS(%rsp) - jnz nmi_userspace -nmi_swapgs: - SWAPGS_UNSAFE_STACK -nmi_restore: - RESTORE_ALL 8 - jmp irq_return -nmi_userspace: - GET_THREAD_INFO(%rcx) - movl TI_flags(%rcx),%ebx - andl $_TIF_WORK_MASK,%ebx - jz nmi_swapgs - movq %rsp,%rdi /* &pt_regs */ - call sync_regs - movq %rax,%rsp /* switch stack for scheduling */ - testl $_TIF_NEED_RESCHED,%ebx - jnz nmi_schedule - movl %ebx,%edx /* arg3: thread flags */ - ENABLE_INTERRUPTS(CLBR_NONE) - xorl %esi,%esi /* arg2: oldset */ - movq %rsp,%rdi /* arg1: &pt_regs */ - call do_notify_resume - DISABLE_INTERRUPTS(CLBR_NONE) - jmp nmi_userspace -nmi_schedule: - ENABLE_INTERRUPTS(CLBR_ANY) - call schedule - DISABLE_INTERRUPTS(CLBR_ANY) - jmp nmi_userspace - CFI_ENDPROC -#else - jmp paranoid_exit - CFI_ENDPROC -#endif -KPROBE_END(nmi) - /* Call softirq on interrupt stack. Interrupts are off. */ ENTRY(call_softirq) CFI_STARTPROC @@ -1437,13 +1227,6 @@ ENTRY(call_softirq) CFI_ENDPROC END(call_softirq) -KPROBE_ENTRY(ignore_sysret) - CFI_STARTPROC - mov $-ENOSYS,%eax - sysret - CFI_ENDPROC -KPROBE_END(ignore_sysret) - #ifdef CONFIG_XEN zeroentry xen_hypervisor_callback xen_do_hypervisor_callback @@ -1540,3 +1323,216 @@ ENTRY(xen_failsafe_callback) END(xen_failsafe_callback) #endif /* CONFIG_XEN */ + +/* + * Some functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" + +paranoidzeroentry_ist debug do_debug DEBUG_STACK +paranoidzeroentry_ist int3 do_int3 DEBUG_STACK +paranoiderrorentry stack_segment do_stack_segment +errorentry general_protection do_general_protection +errorentry page_fault do_page_fault +#ifdef CONFIG_X86_MCE +paranoidzeroentry machine_check do_machine_check +#endif + + /* + * "Paranoid" exit path from exception stack. + * Paranoid because this is used by NMIs and cannot take + * any kernel state for granted. + * We don't do kernel preemption checks here, because only + * NMI should be common and it does not enable IRQs and + * cannot get reschedule ticks. + * + * "trace" is 0 for the NMI handler only, because irq-tracing + * is fundamentally NMI-unsafe. (we cannot change the soft and + * hard flags at once, atomically) + */ + + /* ebx: no swapgs flag */ +ENTRY(paranoid_exit) + INTR_FRAME + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + testl %ebx,%ebx /* swapgs needed? */ + jnz paranoid_restore + testl $3,CS(%rsp) + jnz paranoid_userspace +paranoid_swapgs: + TRACE_IRQS_IRETQ 0 + SWAPGS_UNSAFE_STACK +paranoid_restore: + RESTORE_ALL 8 + jmp irq_return +paranoid_userspace: + GET_THREAD_INFO(%rcx) + movl TI_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz paranoid_swapgs + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz paranoid_schedule + movl %ebx,%edx /* arg3: thread flags */ + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_NONE) + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + jmp paranoid_userspace +paranoid_schedule: + TRACE_IRQS_ON + ENABLE_INTERRUPTS(CLBR_ANY) + call schedule + DISABLE_INTERRUPTS(CLBR_ANY) + TRACE_IRQS_OFF + jmp paranoid_userspace + CFI_ENDPROC +END(paranoid_exit) + +/* + * Exception entry point. This expects an error code/orig_rax on the stack. + * returns in "no swapgs flag" in %ebx. + */ +ENTRY(error_entry) + XCPT_FRAME + CFI_ADJUST_CFA_OFFSET 15*8 + /* oldrax contains error code */ + cld + movq_cfi rdi, RDI+8 + movq_cfi rsi, RSI+8 + movq_cfi rdx, RDX+8 + movq_cfi rcx, RCX+8 + movq_cfi rax, RAX+8 + movq_cfi r8, R8+8 + movq_cfi r9, R9+8 + movq_cfi r10, R10+8 + movq_cfi r11, R11+8 + movq_cfi rbx, RBX+8 + movq_cfi rbp, RBP+8 + movq_cfi r12, R12+8 + movq_cfi r13, R13+8 + movq_cfi r14, R14+8 + movq_cfi r15, R15+8 + xorl %ebx,%ebx + testl $3,CS+8(%rsp) + je error_kernelspace +error_swapgs: + SWAPGS +error_sti: + TRACE_IRQS_OFF + ret + CFI_ENDPROC + +/* + * There are two places in the kernel that can potentially fault with + * usergs. Handle them here. The exception handlers after iret run with + * kernel gs again, so don't set the user space flag. B stepping K8s + * sometimes report an truncated RIP for IRET exceptions returning to + * compat mode. Check for these here too. + */ +error_kernelspace: + incl %ebx + leaq irq_return(%rip),%rcx + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + movl %ecx,%ecx /* zero extend */ + cmpq %rcx,RIP+8(%rsp) + je error_swapgs + cmpq $gs_change,RIP+8(%rsp) + je error_swapgs + jmp error_sti +END(error_entry) + + +/* ebx: no swapgs flag (1: don't need swapgs, 0: need it) */ +ENTRY(error_exit) + DEFAULT_FRAME + movl %ebx,%eax + RESTORE_REST + DISABLE_INTERRUPTS(CLBR_NONE) + TRACE_IRQS_OFF + GET_THREAD_INFO(%rcx) + testl %eax,%eax + jne retint_kernel + LOCKDEP_SYS_EXIT_IRQ + movl TI_flags(%rcx),%edx + movl $_TIF_WORK_MASK,%edi + andl %edi,%edx + jnz retint_careful + jmp retint_swapgs + CFI_ENDPROC +END(error_exit) + + + /* runs on exception stack */ +ENTRY(nmi) + INTR_FRAME + PARAVIRT_ADJUST_EXCEPTION_FRAME + pushq_cfi $-1 + subq $15*8, %rsp + CFI_ADJUST_CFA_OFFSET 15*8 + call save_paranoid + DEFAULT_FRAME 0 + /* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */ + movq %rsp,%rdi + movq $-1,%rsi + call do_nmi +#ifdef CONFIG_TRACE_IRQFLAGS + /* paranoidexit; without TRACE_IRQS_OFF */ + /* ebx: no swapgs flag */ + DISABLE_INTERRUPTS(CLBR_NONE) + testl %ebx,%ebx /* swapgs needed? */ + jnz nmi_restore + testl $3,CS(%rsp) + jnz nmi_userspace +nmi_swapgs: + SWAPGS_UNSAFE_STACK +nmi_restore: + RESTORE_ALL 8 + jmp irq_return +nmi_userspace: + GET_THREAD_INFO(%rcx) + movl TI_flags(%rcx),%ebx + andl $_TIF_WORK_MASK,%ebx + jz nmi_swapgs + movq %rsp,%rdi /* &pt_regs */ + call sync_regs + movq %rax,%rsp /* switch stack for scheduling */ + testl $_TIF_NEED_RESCHED,%ebx + jnz nmi_schedule + movl %ebx,%edx /* arg3: thread flags */ + ENABLE_INTERRUPTS(CLBR_NONE) + xorl %esi,%esi /* arg2: oldset */ + movq %rsp,%rdi /* arg1: &pt_regs */ + call do_notify_resume + DISABLE_INTERRUPTS(CLBR_NONE) + jmp nmi_userspace +nmi_schedule: + ENABLE_INTERRUPTS(CLBR_ANY) + call schedule + DISABLE_INTERRUPTS(CLBR_ANY) + jmp nmi_userspace + CFI_ENDPROC +#else + jmp paranoid_exit + CFI_ENDPROC +#endif +END(nmi) + +ENTRY(ignore_sysret) + CFI_STARTPROC + mov $-ENOSYS,%eax + sysret + CFI_ENDPROC +END(ignore_sysret) + +/* + * End of kprobes section + */ + .popsection From d211af055d0c12dc3416c2886e6fbdc6eb74a381 Mon Sep 17 00:00:00 2001 From: Alexander van Heukelum Date: Mon, 24 Nov 2008 15:38:45 +0100 Subject: [PATCH 19/26] i386: get rid of the use of KPROBE_ENTRY / KPROBE_END entry_32.S is now the only user of KPROBE_ENTRY / KPROBE_END, treewide. This patch reorders entry_64.S and explicitly generates a separate section for functions that need the protection. The generated code before and after the patch is equal. The KPROBE_ENTRY and KPROBE_END macro's are removed too. Signed-off-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_32.S | 438 +++++++++++++++++++------------------ include/linux/linkage.h | 8 - 2 files changed, 224 insertions(+), 222 deletions(-) diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index bd02ec77edc..6e96028d1a9 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -688,65 +688,6 @@ ENDPROC(name) /* The include is where all of the SMP etc. interrupts come from */ #include "entry_arch.h" -KPROBE_ENTRY(page_fault) - RING0_EC_FRAME - pushl $do_page_fault - CFI_ADJUST_CFA_OFFSET 4 - ALIGN -error_code: - /* the function address is in %fs's slot on the stack */ - pushl %es - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET es, 0*/ - pushl %ds - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET ds, 0*/ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET eax, 0 - pushl %ebp - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebp, 0 - pushl %edi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edi, 0 - pushl %esi - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET esi, 0 - pushl %edx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET edx, 0 - pushl %ecx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ecx, 0 - pushl %ebx - CFI_ADJUST_CFA_OFFSET 4 - CFI_REL_OFFSET ebx, 0 - cld - pushl %fs - CFI_ADJUST_CFA_OFFSET 4 - /*CFI_REL_OFFSET fs, 0*/ - movl $(__KERNEL_PERCPU), %ecx - movl %ecx, %fs - UNWIND_ESPFIX_STACK - popl %ecx - CFI_ADJUST_CFA_OFFSET -4 - /*CFI_REGISTER es, ecx*/ - movl PT_FS(%esp), %edi # get the function address - movl PT_ORIG_EAX(%esp), %edx # get the error code - movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart - mov %ecx, PT_FS(%esp) - /*CFI_REL_OFFSET fs, ES*/ - movl $(__USER_DS), %ecx - movl %ecx, %ds - movl %ecx, %es - TRACE_IRQS_OFF - movl %esp,%eax # pt_regs pointer - call *%edi - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(page_fault) - ENTRY(coprocessor_error) RING0_INT_FRAME pushl $0 @@ -777,140 +718,6 @@ ENTRY(device_not_available) CFI_ENDPROC END(device_not_available) -/* - * Debug traps and NMI can happen at the one SYSENTER instruction - * that sets up the real kernel stack. Check here, since we can't - * allow the wrong stack to be used. - * - * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have - * already pushed 3 words if it hits on the sysenter instruction: - * eflags, cs and eip. - * - * We just load the right stack, and push the three (known) values - * by hand onto the new stack - while updating the return eip past - * the instruction that would have done it for sysenter. - */ -#define FIX_STACK(offset, ok, label) \ - cmpw $__KERNEL_CS,4(%esp); \ - jne ok; \ -label: \ - movl TSS_sysenter_sp0+offset(%esp),%esp; \ - CFI_DEF_CFA esp, 0; \ - CFI_UNDEFINED eip; \ - pushfl; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $__KERNEL_CS; \ - CFI_ADJUST_CFA_OFFSET 4; \ - pushl $sysenter_past_esp; \ - CFI_ADJUST_CFA_OFFSET 4; \ - CFI_REL_OFFSET eip, 0 - -KPROBE_ENTRY(debug) - RING0_INT_FRAME - cmpl $ia32_sysenter_target,(%esp) - jne debug_stack_correct - FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) -debug_stack_correct: - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # error code 0 - movl %esp,%eax # pt_regs pointer - call do_debug - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(debug) - -/* - * NMI is doubly nasty. It can happen _while_ we're handling - * a debug fault, and the debug fault hasn't yet been able to - * clear up the stack. So we first check whether we got an - * NMI on the sysenter entry path, but after that we need to - * check whether we got an NMI on the debug path where the debug - * fault happened on the sysenter path. - */ -KPROBE_ENTRY(nmi) - RING0_INT_FRAME - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %ss, %eax - cmpw $__ESPFIX_SS, %ax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - je nmi_espfix_stack - cmpl $ia32_sysenter_target,(%esp) - je nmi_stack_fixup - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - movl %esp,%eax - /* Do not access memory above the end of our stack page, - * it might not exist. - */ - andl $(THREAD_SIZE-1),%eax - cmpl $(THREAD_SIZE-20),%eax - popl %eax - CFI_ADJUST_CFA_OFFSET -4 - jae nmi_stack_correct - cmpl $ia32_sysenter_target,12(%esp) - je nmi_debug_stack_check -nmi_stack_correct: - /* We have a RING0_INT_FRAME here */ - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_nmi - jmp restore_nocheck_notrace - CFI_ENDPROC - -nmi_stack_fixup: - RING0_INT_FRAME - FIX_STACK(12,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_debug_stack_check: - /* We have a RING0_INT_FRAME here */ - cmpw $__KERNEL_CS,16(%esp) - jne nmi_stack_correct - cmpl $debug,(%esp) - jb nmi_stack_correct - cmpl $debug_esp_fix_insn,(%esp) - ja nmi_stack_correct - FIX_STACK(24,nmi_stack_correct, 1) - jmp nmi_stack_correct - -nmi_espfix_stack: - /* We have a RING0_INT_FRAME here. - * - * create the pointer to lss back - */ - pushl %ss - CFI_ADJUST_CFA_OFFSET 4 - pushl %esp - CFI_ADJUST_CFA_OFFSET 4 - addw $4, (%esp) - /* copy the iret frame of 12 bytes */ - .rept 3 - pushl 16(%esp) - CFI_ADJUST_CFA_OFFSET 4 - .endr - pushl %eax - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - FIXUP_ESPFIX_STACK # %eax == %esp - xorl %edx,%edx # zero error code - call do_nmi - RESTORE_REGS - lss 12+4(%esp), %esp # back to espfix stack - CFI_ADJUST_CFA_OFFSET -24 - jmp irq_return - CFI_ENDPROC -KPROBE_END(nmi) - #ifdef CONFIG_PARAVIRT ENTRY(native_iret) iret @@ -926,19 +733,6 @@ ENTRY(native_irq_enable_sysexit) END(native_irq_enable_sysexit) #endif -KPROBE_ENTRY(int3) - RING0_INT_FRAME - pushl $-1 # mark this as an int - CFI_ADJUST_CFA_OFFSET 4 - SAVE_ALL - TRACE_IRQS_OFF - xorl %edx,%edx # zero error code - movl %esp,%eax # pt_regs pointer - call do_int3 - jmp ret_from_exception - CFI_ENDPROC -KPROBE_END(int3) - ENTRY(overflow) RING0_INT_FRAME pushl $0 @@ -1003,14 +797,6 @@ ENTRY(stack_segment) CFI_ENDPROC END(stack_segment) -KPROBE_ENTRY(general_protection) - RING0_EC_FRAME - pushl $do_general_protection - CFI_ADJUST_CFA_OFFSET 4 - jmp error_code - CFI_ENDPROC -KPROBE_END(general_protection) - ENTRY(alignment_check) RING0_EC_FRAME pushl $do_alignment_check @@ -1220,3 +1006,227 @@ END(mcount) #include "syscall_table_32.S" syscall_table_size=(.-sys_call_table) + +/* + * Some functions should be protected against kprobes + */ + .pushsection .kprobes.text, "ax" + +ENTRY(page_fault) + RING0_EC_FRAME + pushl $do_page_fault + CFI_ADJUST_CFA_OFFSET 4 + ALIGN +error_code: + /* the function address is in %fs's slot on the stack */ + pushl %es + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET es, 0*/ + pushl %ds + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET ds, 0*/ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET eax, 0 + pushl %ebp + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebp, 0 + pushl %edi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edi, 0 + pushl %esi + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET esi, 0 + pushl %edx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET edx, 0 + pushl %ecx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ecx, 0 + pushl %ebx + CFI_ADJUST_CFA_OFFSET 4 + CFI_REL_OFFSET ebx, 0 + cld + pushl %fs + CFI_ADJUST_CFA_OFFSET 4 + /*CFI_REL_OFFSET fs, 0*/ + movl $(__KERNEL_PERCPU), %ecx + movl %ecx, %fs + UNWIND_ESPFIX_STACK + popl %ecx + CFI_ADJUST_CFA_OFFSET -4 + /*CFI_REGISTER es, ecx*/ + movl PT_FS(%esp), %edi # get the function address + movl PT_ORIG_EAX(%esp), %edx # get the error code + movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart + mov %ecx, PT_FS(%esp) + /*CFI_REL_OFFSET fs, ES*/ + movl $(__USER_DS), %ecx + movl %ecx, %ds + movl %ecx, %es + TRACE_IRQS_OFF + movl %esp,%eax # pt_regs pointer + call *%edi + jmp ret_from_exception + CFI_ENDPROC +END(page_fault) + +/* + * Debug traps and NMI can happen at the one SYSENTER instruction + * that sets up the real kernel stack. Check here, since we can't + * allow the wrong stack to be used. + * + * "TSS_sysenter_sp0+12" is because the NMI/debug handler will have + * already pushed 3 words if it hits on the sysenter instruction: + * eflags, cs and eip. + * + * We just load the right stack, and push the three (known) values + * by hand onto the new stack - while updating the return eip past + * the instruction that would have done it for sysenter. + */ +#define FIX_STACK(offset, ok, label) \ + cmpw $__KERNEL_CS,4(%esp); \ + jne ok; \ +label: \ + movl TSS_sysenter_sp0+offset(%esp),%esp; \ + CFI_DEF_CFA esp, 0; \ + CFI_UNDEFINED eip; \ + pushfl; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $__KERNEL_CS; \ + CFI_ADJUST_CFA_OFFSET 4; \ + pushl $sysenter_past_esp; \ + CFI_ADJUST_CFA_OFFSET 4; \ + CFI_REL_OFFSET eip, 0 + +ENTRY(debug) + RING0_INT_FRAME + cmpl $ia32_sysenter_target,(%esp) + jne debug_stack_correct + FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) +debug_stack_correct: + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # error code 0 + movl %esp,%eax # pt_regs pointer + call do_debug + jmp ret_from_exception + CFI_ENDPROC +END(debug) + +/* + * NMI is doubly nasty. It can happen _while_ we're handling + * a debug fault, and the debug fault hasn't yet been able to + * clear up the stack. So we first check whether we got an + * NMI on the sysenter entry path, but after that we need to + * check whether we got an NMI on the debug path where the debug + * fault happened on the sysenter path. + */ +ENTRY(nmi) + RING0_INT_FRAME + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %ss, %eax + cmpw $__ESPFIX_SS, %ax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + je nmi_espfix_stack + cmpl $ia32_sysenter_target,(%esp) + je nmi_stack_fixup + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + movl %esp,%eax + /* Do not access memory above the end of our stack page, + * it might not exist. + */ + andl $(THREAD_SIZE-1),%eax + cmpl $(THREAD_SIZE-20),%eax + popl %eax + CFI_ADJUST_CFA_OFFSET -4 + jae nmi_stack_correct + cmpl $ia32_sysenter_target,12(%esp) + je nmi_debug_stack_check +nmi_stack_correct: + /* We have a RING0_INT_FRAME here */ + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_nmi + jmp restore_nocheck_notrace + CFI_ENDPROC + +nmi_stack_fixup: + RING0_INT_FRAME + FIX_STACK(12,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_debug_stack_check: + /* We have a RING0_INT_FRAME here */ + cmpw $__KERNEL_CS,16(%esp) + jne nmi_stack_correct + cmpl $debug,(%esp) + jb nmi_stack_correct + cmpl $debug_esp_fix_insn,(%esp) + ja nmi_stack_correct + FIX_STACK(24,nmi_stack_correct, 1) + jmp nmi_stack_correct + +nmi_espfix_stack: + /* We have a RING0_INT_FRAME here. + * + * create the pointer to lss back + */ + pushl %ss + CFI_ADJUST_CFA_OFFSET 4 + pushl %esp + CFI_ADJUST_CFA_OFFSET 4 + addw $4, (%esp) + /* copy the iret frame of 12 bytes */ + .rept 3 + pushl 16(%esp) + CFI_ADJUST_CFA_OFFSET 4 + .endr + pushl %eax + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + FIXUP_ESPFIX_STACK # %eax == %esp + xorl %edx,%edx # zero error code + call do_nmi + RESTORE_REGS + lss 12+4(%esp), %esp # back to espfix stack + CFI_ADJUST_CFA_OFFSET -24 + jmp irq_return + CFI_ENDPROC +END(nmi) + +ENTRY(int3) + RING0_INT_FRAME + pushl $-1 # mark this as an int + CFI_ADJUST_CFA_OFFSET 4 + SAVE_ALL + TRACE_IRQS_OFF + xorl %edx,%edx # zero error code + movl %esp,%eax # pt_regs pointer + call do_int3 + jmp ret_from_exception + CFI_ENDPROC +END(int3) + +ENTRY(general_protection) + RING0_EC_FRAME + pushl $do_general_protection + CFI_ADJUST_CFA_OFFSET 4 + jmp error_code + CFI_ENDPROC +END(general_protection) + +/* + * End of kprobes section + */ + .popsection diff --git a/include/linux/linkage.h b/include/linux/linkage.h index 9fd1f859021..fee9e59649c 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -64,14 +64,6 @@ name: #endif -#define KPROBE_ENTRY(name) \ - .pushsection .kprobes.text, "ax"; \ - ENTRY(name) - -#define KPROBE_END(name) \ - END(name); \ - .popsection - #ifndef END #define END(name) \ .size name, .-name From 33454539f386a2beb38269bea5fff82b3d56b0e9 Mon Sep 17 00:00:00 2001 From: "gorcunov@gmail.com" Date: Wed, 26 Nov 2008 22:17:02 +0300 Subject: [PATCH 20/26] x86: entry_64.S - use X86_EFLAGS_IF instead of hardcoded number Impact: cleanup Signed-off-by: Cyrill Gorcunov Acked-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 38fcd0517c3..1c309d54651 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -194,7 +194,7 @@ ENTRY(native_usergs_sysret64) pushq %rax /* rsp */ CFI_ADJUST_CFA_OFFSET 8 CFI_REL_OFFSET rsp,0 - pushq $(1<<9) /* eflags - interrupts on */ + pushq $X86_EFLAGS_IF /* eflags - interrupts on */ CFI_ADJUST_CFA_OFFSET 8 /*CFI_REL_OFFSET rflags,0*/ pushq $__KERNEL_CS /* cs */ From c2c631e318091118587f3b766347d259c9265b8b Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 26 Nov 2008 22:17:00 +0300 Subject: [PATCH 21/26] x86: entry_64.S - use ENTRY to define child_rip child_rip is called not by its name but indirectly rather so make it global and aligned. Signed-off-by: Cyrill Gorcunov Acked-by: Alexander van Heukelum Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 1c309d54651..0a910a7f85f 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1160,7 +1160,7 @@ ENTRY(kernel_thread) CFI_ENDPROC END(kernel_thread) -child_rip: +ENTRY(child_rip) pushq $0 # fake return address CFI_STARTPROC /* From 5ae3a139cf4fc2349f1dfa1993a66c1dcc119468 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 27 Nov 2008 00:02:10 +0300 Subject: [PATCH 22/26] x86: uv bau interrupt -- use proper interrupt number Signed-off-by: Cyrill Gorcunov Acked-by: Cliff Wickman Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 0a910a7f85f..57d7f7a5ad2 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -938,7 +938,7 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \ irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt #endif -apicinterrupt 220 \ +apicinterrupt UV_BAU_MESSAGE \ uv_bau_message_intr1 uv_bau_message_interrupt apicinterrupt LOCAL_TIMER_VECTOR \ apic_timer_interrupt smp_apic_timer_interrupt From 9f1e87ea3ecb3c46c21f6a1a202ec82f99ed2473 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 27 Nov 2008 21:10:08 +0300 Subject: [PATCH 23/26] x86: entry_64.S - trivial: space, comments fixup Impact: cleanup Signed-off-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar --- arch/x86/kernel/entry_64.S | 94 +++++++++++++++++++------------------- 1 file changed, 48 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 57d7f7a5ad2..08c0c9777a0 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -1020,7 +1020,7 @@ END(\sym) .macro paranoidzeroentry_ist sym do_sym ist ENTRY(\sym) - INTR_FRAME + INTR_FRAME PARAVIRT_ADJUST_EXCEPTION_FRAME pushq $-1 /* ORIG_RAX: no syscall to restart */ CFI_ADJUST_CFA_OFFSET 8 @@ -1088,36 +1088,36 @@ zeroentry coprocessor_error do_coprocessor_error errorentry alignment_check do_alignment_check zeroentry simd_coprocessor_error do_simd_coprocessor_error - /* Reload gs selector with exception handling */ - /* edi: new selector */ + /* Reload gs selector with exception handling */ + /* edi: new selector */ ENTRY(native_load_gs_index) CFI_STARTPROC pushf CFI_ADJUST_CFA_OFFSET 8 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) - SWAPGS + SWAPGS gs_change: - movl %edi,%gs + movl %edi,%gs 2: mfence /* workaround */ SWAPGS - popf + popf CFI_ADJUST_CFA_OFFSET -8 - ret + ret CFI_ENDPROC END(native_load_gs_index) - .section __ex_table,"a" - .align 8 - .quad gs_change,bad_gs - .previous - .section .fixup,"ax" + .section __ex_table,"a" + .align 8 + .quad gs_change,bad_gs + .previous + .section .fixup,"ax" /* running with kernelgs */ bad_gs: SWAPGS /* switch back to user gs */ xorl %eax,%eax - movl %eax,%gs - jmp 2b - .previous + movl %eax,%gs + jmp 2b + .previous /* * Create a kernel thread. @@ -1152,7 +1152,7 @@ ENTRY(kernel_thread) * so internally to the x86_64 port you can rely on kernel_thread() * not to reschedule the child before returning, this avoids the need * of hacks for example to fork off the per-CPU idle tasks. - * [Hopefully no generic code relies on the reschedule -AK] + * [Hopefully no generic code relies on the reschedule -AK] */ RESTORE_ALL UNFAKE_STACK_FRAME @@ -1231,22 +1231,24 @@ END(call_softirq) zeroentry xen_hypervisor_callback xen_do_hypervisor_callback /* -# A note on the "critical region" in our callback handler. -# We want to avoid stacking callback handlers due to events occurring -# during handling of the last event. To do this, we keep events disabled -# until we've done all processing. HOWEVER, we must enable events before -# popping the stack frame (can't be done atomically) and so it would still -# be possible to get enough handler activations to overflow the stack. -# Although unlikely, bugs of that kind are hard to track down, so we'd -# like to avoid the possibility. -# So, on entry to the handler we detect whether we interrupted an -# existing activation in its critical region -- if so, we pop the current -# activation and restart the handler using the previous one. -*/ + * A note on the "critical region" in our callback handler. + * We want to avoid stacking callback handlers due to events occurring + * during handling of the last event. To do this, we keep events disabled + * until we've done all processing. HOWEVER, we must enable events before + * popping the stack frame (can't be done atomically) and so it would still + * be possible to get enough handler activations to overflow the stack. + * Although unlikely, bugs of that kind are hard to track down, so we'd + * like to avoid the possibility. + * So, on entry to the handler we detect whether we interrupted an + * existing activation in its critical region -- if so, we pop the current + * activation and restart the handler using the previous one. + */ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) CFI_STARTPROC -/* Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will - see the correct pointer to the pt_regs */ +/* + * Since we don't modify %rdi, evtchn_do_upall(struct *pt_regs) will + * see the correct pointer to the pt_regs + */ movq %rdi, %rsp # we don't return, adjust the stack frame CFI_ENDPROC DEFAULT_FRAME @@ -1264,18 +1266,18 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs) END(do_hypervisor_callback) /* -# Hypervisor uses this for application faults while it executes. -# We get here for two reasons: -# 1. Fault while reloading DS, ES, FS or GS -# 2. Fault while executing IRET -# Category 1 we do not need to fix up as Xen has already reloaded all segment -# registers that could be reloaded and zeroed the others. -# Category 2 we fix up by killing the current process. We cannot use the -# normal Linux return path in this case because if we use the IRET hypercall -# to pop the stack frame we end up in an infinite loop of failsafe callbacks. -# We distinguish between categories by comparing each saved segment register -# with its current contents: any discrepancy means we in category 1. -*/ + * Hypervisor uses this for application faults while it executes. + * We get here for two reasons: + * 1. Fault while reloading DS, ES, FS or GS + * 2. Fault while executing IRET + * Category 1 we do not need to fix up as Xen has already reloaded all segment + * registers that could be reloaded and zeroed the others. + * Category 2 we fix up by killing the current process. We cannot use the + * normal Linux return path in this case because if we use the IRET hypercall + * to pop the stack frame we end up in an infinite loop of failsafe callbacks. + * We distinguish between categories by comparing each saved segment register + * with its current contents: any discrepancy means we in category 1. + */ ENTRY(xen_failsafe_callback) INTR_FRAME 1 (6*8) /*CFI_REL_OFFSET gs,GS*/ @@ -1339,8 +1341,8 @@ paranoidzeroentry machine_check do_machine_check #endif /* - * "Paranoid" exit path from exception stack. - * Paranoid because this is used by NMIs and cannot take + * "Paranoid" exit path from exception stack. + * Paranoid because this is used by NMIs and cannot take * any kernel state for granted. * We don't do kernel preemption checks here, because only * NMI should be common and it does not enable IRQs and @@ -1445,7 +1447,7 @@ error_kernelspace: cmpq %rcx,RIP+8(%rsp) je error_swapgs cmpq $gs_change,RIP+8(%rsp) - je error_swapgs + je error_swapgs jmp error_sti END(error_entry) @@ -1521,7 +1523,7 @@ nmi_schedule: CFI_ENDPROC #else jmp paranoid_exit - CFI_ENDPROC + CFI_ENDPROC #endif END(nmi) From 5b3eec0c80038c8739ccd465b897a35c0dff1cc4 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 27 Nov 2008 14:41:21 +0100 Subject: [PATCH 24/26] x86: ret_from_fork - get rid of jump back Impact: remove dead code If we take a closer look at the rff_trace/rff_action ret_from_fork code, we have to realize that it does all the wrong things: for example it checks the TIF flag - while later on jumping back to the ret-from-syscall path - duplicating the check needlessly. But checking for _TIF_SYSCALL_TRACE is completely unnecessary here because we clear that flag for every freshly forked task. So the whole "tracing" code here, for which there is a out of line jump optimization that makes it even harder to read, is in reality completely dead code ... Reported-by: Cyrill Gorcunov Signed-off-by: Ingo Molnar Tested-by: Cyrill Gorcunov --- arch/x86/kernel/entry_64.S | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e41734a537b..3194636a429 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -361,34 +361,35 @@ ENTRY(save_paranoid) END(save_paranoid) /* - * A newly forked process directly context switches into this. + * A newly forked process directly context switches into this address. + * + * rdi: prev task we switched from */ -/* rdi: prev */ ENTRY(ret_from_fork) DEFAULT_FRAME + push kernel_eflags(%rip) CFI_ADJUST_CFA_OFFSET 8 - popf # reset kernel eflags + popf # reset kernel eflags CFI_ADJUST_CFA_OFFSET -8 - call schedule_tail + + call schedule_tail # rdi: 'prev' task parameter + GET_THREAD_INFO(%rcx) - testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx) + CFI_REMEMBER_STATE - jnz rff_trace -rff_action: RESTORE_REST - testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? + + testl $3, CS-ARGOFFSET(%rsp) # from kernel_thread? je int_ret_from_sys_call - testl $_TIF_IA32,TI_flags(%rcx) + + testl $_TIF_IA32, TI_flags(%rcx) # 32-bit compat task needs IRET jnz int_ret_from_sys_call + RESTORE_TOP_OF_STACK %rdi, -ARGOFFSET - jmp ret_from_sys_call + jmp ret_from_sys_call # go to the SYSRET fastpath + CFI_RESTORE_STATE -rff_trace: - movq %rsp,%rdi - call syscall_trace_leave - GET_THREAD_INFO(%rcx) - jmp rff_action CFI_ENDPROC END(ret_from_fork) From 915b0d0104b72fd36af088ba4b11b5690bc96a6c Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Mon, 8 Dec 2008 19:19:26 -0800 Subject: [PATCH 25/26] x86: hardirq: introduce inc_irq_stat() Impact: cleanup Introduce inc_irq_stat() macro and unify irq_stat accounting code. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/include/asm/hardirq_32.h | 2 ++ arch/x86/include/asm/hardirq_64.h | 2 ++ arch/x86/kernel/apic.c | 13 +++---------- arch/x86/kernel/smp.c | 18 +++--------------- arch/x86/kernel/traps.c | 6 +----- 5 files changed, 11 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h index 5ca135e72f2..cf7954d1405 100644 --- a/arch/x86/include/asm/hardirq_32.h +++ b/arch/x86/include/asm/hardirq_32.h @@ -22,6 +22,8 @@ DECLARE_PER_CPU(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT #define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) +#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++) + void ack_bad_irq(unsigned int irq); #include diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h index 1ba381fc51d..b5a6b5d5670 100644 --- a/arch/x86/include/asm/hardirq_64.h +++ b/arch/x86/include/asm/hardirq_64.h @@ -11,6 +11,8 @@ #define __ARCH_IRQ_STAT 1 +#define inc_irq_stat(member) add_pda(member, 1) + #define local_softirq_pending() read_pda(__softirq_pending) #define __ARCH_SET_SOFTIRQ_PENDING 1 diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c index 16f94879b52..1771dd74681 100644 --- a/arch/x86/kernel/apic.c +++ b/arch/x86/kernel/apic.c @@ -783,11 +783,7 @@ static void local_apic_timer_interrupt(void) /* * the NMI deadlock-detector uses this. */ -#ifdef CONFIG_X86_64 - add_pda(apic_timer_irqs, 1); -#else - per_cpu(irq_stat, cpu).apic_timer_irqs++; -#endif + inc_irq_stat(apic_timer_irqs); evt->event_handler(evt); } @@ -1695,14 +1691,11 @@ void smp_spurious_interrupt(struct pt_regs *regs) if (v & (1 << (SPURIOUS_APIC_VECTOR & 0x1f))) ack_APIC_irq(); -#ifdef CONFIG_X86_64 - add_pda(irq_spurious_count, 1); -#else + inc_irq_stat(irq_spurious_count); + /* see sw-dev-man vol 3, chapter 7.4.13.5 */ printk(KERN_INFO "spurious APIC interrupt on CPU#%d, " "should never happen.\n", smp_processor_id()); - __get_cpu_var(irq_stat).irq_spurious_count++; -#endif irq_exit(); } diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 18f9b19f5f8..d18537ce2c7 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -178,11 +178,7 @@ static void native_smp_send_stop(void) void smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_resched_count++; -#else - add_pda(irq_resched_count, 1); -#endif + inc_irq_stat(irq_resched_count); } void smp_call_function_interrupt(struct pt_regs *regs) @@ -190,11 +186,7 @@ void smp_call_function_interrupt(struct pt_regs *regs) ack_APIC_irq(); irq_enter(); generic_smp_call_function_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); } @@ -203,11 +195,7 @@ void smp_call_function_single_interrupt(struct pt_regs *regs) ack_APIC_irq(); irq_enter(); generic_smp_call_function_single_interrupt(); -#ifdef CONFIG_X86_32 - __get_cpu_var(irq_stat).irq_call_count++; -#else - add_pda(irq_call_count, 1); -#endif + inc_irq_stat(irq_call_count); irq_exit(); } diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 04d242ab016..d815293e6d9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -481,11 +481,7 @@ do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); -#ifdef CONFIG_X86_32 - { int cpu; cpu = smp_processor_id(); ++nmi_count(cpu); } -#else - add_pda(__nmi_count, 1); -#endif + inc_irq_stat(__nmi_count); if (!ignore_nmis) default_do_nmi(regs); From 8ae936690972dfcad73d0dde1095b9f32af5ee95 Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 12 Dec 2008 15:52:26 -0800 Subject: [PATCH 26/26] x86: hardirq: use inc_irq_stat() in non-unified functions Impact: cleanup Replace incrementing irq stat with inc_irq_stat() in non-unified functions. Signed-off-by: Hiroshi Shimamoto Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd_64.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 2 +- arch/x86/kernel/time_32.c | 2 +- arch/x86/kernel/time_64.c | 2 +- arch/x86/kernel/tlb_32.c | 2 +- arch/x86/kernel/tlb_64.c | 2 +- 6 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c index 5eb390a4b2e..748c8f9e7a0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c @@ -237,7 +237,7 @@ asmlinkage void mce_threshold_interrupt(void) } } out: - add_pda(irq_threshold_count, 1); + inc_irq_stat(irq_threshold_count); irq_exit(); } diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c index c17eaf5dd6d..4b48f251fd3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c @@ -26,7 +26,7 @@ asmlinkage void smp_thermal_interrupt(void) if (therm_throt_process(msr_val & 1)) mce_log_therm_throt_event(smp_processor_id(), msr_val); - add_pda(irq_thermal_count, 1); + inc_irq_stat(irq_thermal_count); irq_exit(); } diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c index 77b400f06ea..65309e4cb1c 100644 --- a/arch/x86/kernel/time_32.c +++ b/arch/x86/kernel/time_32.c @@ -75,7 +75,7 @@ EXPORT_SYMBOL(profile_pc); irqreturn_t timer_interrupt(int irq, void *dev_id) { /* Keep nmi watchdog up to date */ - per_cpu(irq_stat, smp_processor_id()).irq0_irqs++; + inc_irq_stat(irq0_irqs); #ifdef CONFIG_X86_IO_APIC if (timer_ack) { diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c index 418a095c579..1749cacde8b 100644 --- a/arch/x86/kernel/time_64.c +++ b/arch/x86/kernel/time_64.c @@ -51,7 +51,7 @@ EXPORT_SYMBOL(profile_pc); irqreturn_t timer_interrupt(int irq, void *dev_id) { - add_pda(irq0_irqs, 1); + inc_irq_stat(irq0_irqs); global_clock_event->event_handler(global_clock_event); diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c index f4049f3513b..f374f83fca4 100644 --- a/arch/x86/kernel/tlb_32.c +++ b/arch/x86/kernel/tlb_32.c @@ -119,7 +119,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs) smp_mb__after_clear_bit(); out: put_cpu_no_resched(); - __get_cpu_var(irq_stat).irq_tlb_count++; + inc_irq_stat(irq_tlb_count); } void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c index 8f919ca6949..29887d7081a 100644 --- a/arch/x86/kernel/tlb_64.c +++ b/arch/x86/kernel/tlb_64.c @@ -154,7 +154,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) out: ack_APIC_irq(); cpu_clear(cpu, f->flush_cpumask); - add_pda(irq_tlb_count, 1); + inc_irq_stat(irq_tlb_count); } void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,