dect
/
linux-2.6
Archived
13
0
Fork 0

Merge branch 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86/asm changes from Ingo Molnar

* 'x86-asm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86: Include probe_roms.h in probe_roms.c
  x86/32: Print control and debug registers for kerenel context
  x86: Tighten dependencies of CPU_SUP_*_32
  x86/numa: Improve internode cache alignment
  x86: Fix the NMI nesting comments
  x86-64: Improve insn scheduling in SAVE_ARGS_IRQ
  x86-64: Fix CFI annotations for NMI nesting code
  bitops: Add missing parentheses to new get_order macro
  bitops: Optimise get_order()
  bitops: Adjust the comment on get_order() to describe the size==0 case
  x86/spinlocks: Eliminate TICKET_MASK
  x86-64: Handle byte-wise tail copying in memcpy() without a loop
  x86-64: Fix memcpy() to support sizes of 4Gb and above
  x86-64: Fix memset() to support sizes of 4Gb and above
  x86-64: Slightly shorten copy_page()
This commit is contained in:
Linus Torvalds 2012-03-22 09:13:24 -07:00
commit e17fdf5c67
10 changed files with 128 additions and 98 deletions

View File

@ -303,7 +303,6 @@ config X86_GENERIC
config X86_INTERNODE_CACHE_SHIFT config X86_INTERNODE_CACHE_SHIFT
int int
default "12" if X86_VSMP default "12" if X86_VSMP
default "7" if NUMA
default X86_L1_CACHE_SHIFT default X86_L1_CACHE_SHIFT
config X86_CMPXCHG config X86_CMPXCHG
@ -441,7 +440,7 @@ config CPU_SUP_INTEL
config CPU_SUP_CYRIX_32 config CPU_SUP_CYRIX_32
default y default y
bool "Support Cyrix processors" if PROCESSOR_SELECT bool "Support Cyrix processors" if PROCESSOR_SELECT
depends on !64BIT depends on M386 || M486 || M586 || M586TSC || M586MMX || (EXPERT && !64BIT)
---help--- ---help---
This enables detection, tunings and quirks for Cyrix processors This enables detection, tunings and quirks for Cyrix processors
@ -495,7 +494,7 @@ config CPU_SUP_TRANSMETA_32
config CPU_SUP_UMC_32 config CPU_SUP_UMC_32
default y default y
bool "Support UMC processors" if PROCESSOR_SELECT bool "Support UMC processors" if PROCESSOR_SELECT
depends on !64BIT depends on M386 || M486 || (EXPERT && !64BIT)
---help--- ---help---
This enables detection, tunings and quirks for UMC processors This enables detection, tunings and quirks for UMC processors

View File

@ -88,14 +88,14 @@ static inline int __ticket_spin_is_locked(arch_spinlock_t *lock)
{ {
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
return !!(tmp.tail ^ tmp.head); return tmp.tail != tmp.head;
} }
static inline int __ticket_spin_is_contended(arch_spinlock_t *lock) static inline int __ticket_spin_is_contended(arch_spinlock_t *lock)
{ {
struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets);
return ((tmp.tail - tmp.head) & TICKET_MASK) > 1; return (__ticket_t)(tmp.tail - tmp.head) > 1;
} }
#ifndef CONFIG_PARAVIRT_SPINLOCKS #ifndef CONFIG_PARAVIRT_SPINLOCKS

View File

@ -16,7 +16,6 @@ typedef u32 __ticketpair_t;
#endif #endif
#define TICKET_SHIFT (sizeof(__ticket_t) * 8) #define TICKET_SHIFT (sizeof(__ticket_t) * 8)
#define TICKET_MASK ((__ticket_t)((1 << TICKET_SHIFT) - 1))
typedef struct arch_spinlock { typedef struct arch_spinlock {
union { union {

View File

@ -87,7 +87,7 @@ void show_registers(struct pt_regs *regs)
int i; int i;
print_modules(); print_modules();
__show_regs(regs, 0); __show_regs(regs, !user_mode_vm(regs));
printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n", printk(KERN_EMERG "Process %.*s (pid: %d, ti=%p task=%p task.ti=%p)\n",
TASK_COMM_LEN, current->comm, task_pid_nr(current), TASK_COMM_LEN, current->comm, task_pid_nr(current),

View File

@ -320,7 +320,7 @@ ENDPROC(native_usergs_sysret64)
movq %rsp, %rsi movq %rsp, %rsi
leaq -RBP(%rsp),%rdi /* arg1 for handler */ leaq -RBP(%rsp),%rdi /* arg1 for handler */
testl $3, CS(%rdi) testl $3, CS-RBP(%rsi)
je 1f je 1f
SWAPGS SWAPGS
/* /*
@ -330,11 +330,10 @@ ENDPROC(native_usergs_sysret64)
* moving irq_enter into assembly, which would be too much work) * moving irq_enter into assembly, which would be too much work)
*/ */
1: incl PER_CPU_VAR(irq_count) 1: incl PER_CPU_VAR(irq_count)
jne 2f cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
mov PER_CPU_VAR(irq_stack_ptr),%rsp
CFI_DEF_CFA_REGISTER rsi CFI_DEF_CFA_REGISTER rsi
2: /* Store previous stack value */ /* Store previous stack value */
pushq %rsi pushq %rsi
CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \ CFI_ESCAPE 0x0f /* DW_CFA_def_cfa_expression */, 6, \
0x77 /* DW_OP_breg7 */, 0, \ 0x77 /* DW_OP_breg7 */, 0, \
@ -1530,6 +1529,7 @@ ENTRY(nmi)
/* Use %rdx as out temp variable throughout */ /* Use %rdx as out temp variable throughout */
pushq_cfi %rdx pushq_cfi %rdx
CFI_REL_OFFSET rdx, 0
/* /*
* If %cs was not the kernel segment, then the NMI triggered in user * If %cs was not the kernel segment, then the NMI triggered in user
@ -1554,6 +1554,7 @@ ENTRY(nmi)
*/ */
lea 6*8(%rsp), %rdx lea 6*8(%rsp), %rdx
test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi test_in_nmi rdx, 4*8(%rsp), nested_nmi, first_nmi
CFI_REMEMBER_STATE
nested_nmi: nested_nmi:
/* /*
@ -1585,10 +1586,12 @@ nested_nmi:
nested_nmi_out: nested_nmi_out:
popq_cfi %rdx popq_cfi %rdx
CFI_RESTORE rdx
/* No need to check faults here */ /* No need to check faults here */
INTERRUPT_RETURN INTERRUPT_RETURN
CFI_RESTORE_STATE
first_nmi: first_nmi:
/* /*
* Because nested NMIs will use the pushed location that we * Because nested NMIs will use the pushed location that we
@ -1620,10 +1623,15 @@ first_nmi:
* | pt_regs | * | pt_regs |
* +-------------------------+ * +-------------------------+
* *
* The saved RIP is used to fix up the copied RIP that a nested * The saved stack frame is used to fix up the copied stack frame
* NMI may zero out. The original stack frame and the temp storage * that a nested NMI may change to make the interrupted NMI iret jump
* to the repeat_nmi. The original stack frame and the temp storage
* is also used by nested NMIs and can not be trusted on exit. * is also used by nested NMIs and can not be trusted on exit.
*/ */
/* Do not pop rdx, nested NMIs will corrupt that part of the stack */
movq (%rsp), %rdx
CFI_RESTORE rdx
/* Set the NMI executing variable on the stack. */ /* Set the NMI executing variable on the stack. */
pushq_cfi $1 pushq_cfi $1
@ -1631,22 +1639,39 @@ first_nmi:
.rept 5 .rept 5
pushq_cfi 6*8(%rsp) pushq_cfi 6*8(%rsp)
.endr .endr
CFI_DEF_CFA_OFFSET SS+8-RIP
/* Everything up to here is safe from nested NMIs */
/*
* If there was a nested NMI, the first NMI's iret will return
* here. But NMIs are still enabled and we can take another
* nested NMI. The nested NMI checks the interrupted RIP to see
* if it is between repeat_nmi and end_repeat_nmi, and if so
* it will just return, as we are about to repeat an NMI anyway.
* This makes it safe to copy to the stack frame that a nested
* NMI will update.
*/
repeat_nmi:
/*
* Update the stack variable to say we are still in NMI (the update
* is benign for the non-repeat case, where 1 was pushed just above
* to this very stack slot).
*/
movq $1, 5*8(%rsp)
/* Make another copy, this one may be modified by nested NMIs */ /* Make another copy, this one may be modified by nested NMIs */
.rept 5 .rept 5
pushq_cfi 4*8(%rsp) pushq_cfi 4*8(%rsp)
.endr .endr
CFI_DEF_CFA_OFFSET SS+8-RIP
/* Do not pop rdx, nested NMIs will corrupt it */ end_repeat_nmi:
movq 11*8(%rsp), %rdx
/* /*
* Everything below this point can be preempted by a nested * Everything below this point can be preempted by a nested
* NMI if the first NMI took an exception. Repeated NMIs * NMI if the first NMI took an exception and reset our iret stack
* caused by an exception and nested NMI will start here, and * so that we repeat another NMI.
* can still be preempted by another NMI.
*/ */
restart_nmi:
pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */ pushq_cfi $-1 /* ORIG_RAX: no syscall to restart */
subq $ORIG_RAX-R15, %rsp subq $ORIG_RAX-R15, %rsp
CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15 CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
@ -1675,26 +1700,6 @@ nmi_restore:
CFI_ENDPROC CFI_ENDPROC
END(nmi) END(nmi)
/*
* If an NMI hit an iret because of an exception or breakpoint,
* it can lose its NMI context, and a nested NMI may come in.
* In that case, the nested NMI will change the preempted NMI's
* stack to jump to here when it does the final iret.
*/
repeat_nmi:
INTR_FRAME
/* Update the stack variable to say we are still in NMI */
movq $1, 5*8(%rsp)
/* copy the saved stack back to copy stack */
.rept 5
pushq_cfi 4*8(%rsp)
.endr
jmp restart_nmi
CFI_ENDPROC
end_repeat_nmi:
ENTRY(ignore_sysret) ENTRY(ignore_sysret)
CFI_STARTPROC CFI_STARTPROC
mov $-ENOSYS,%eax mov $-ENOSYS,%eax

View File

@ -12,6 +12,7 @@
#include <linux/pci.h> #include <linux/pci.h>
#include <linux/export.h> #include <linux/export.h>
#include <asm/probe_roms.h>
#include <asm/pci-direct.h> #include <asm/pci-direct.h>
#include <asm/e820.h> #include <asm/e820.h>
#include <asm/mmzone.h> #include <asm/mmzone.h>

View File

@ -20,14 +20,12 @@ ENDPROC(copy_page_c)
ENTRY(copy_page) ENTRY(copy_page)
CFI_STARTPROC CFI_STARTPROC
subq $3*8,%rsp subq $2*8,%rsp
CFI_ADJUST_CFA_OFFSET 3*8 CFI_ADJUST_CFA_OFFSET 2*8
movq %rbx,(%rsp) movq %rbx,(%rsp)
CFI_REL_OFFSET rbx, 0 CFI_REL_OFFSET rbx, 0
movq %r12,1*8(%rsp) movq %r12,1*8(%rsp)
CFI_REL_OFFSET r12, 1*8 CFI_REL_OFFSET r12, 1*8
movq %r13,2*8(%rsp)
CFI_REL_OFFSET r13, 2*8
movl $(4096/64)-5,%ecx movl $(4096/64)-5,%ecx
.p2align 4 .p2align 4
@ -91,10 +89,8 @@ ENTRY(copy_page)
CFI_RESTORE rbx CFI_RESTORE rbx
movq 1*8(%rsp),%r12 movq 1*8(%rsp),%r12
CFI_RESTORE r12 CFI_RESTORE r12
movq 2*8(%rsp),%r13 addq $2*8,%rsp
CFI_RESTORE r13 CFI_ADJUST_CFA_OFFSET -2*8
addq $3*8,%rsp
CFI_ADJUST_CFA_OFFSET -3*8
ret ret
.Lcopy_page_end: .Lcopy_page_end:
CFI_ENDPROC CFI_ENDPROC

View File

@ -27,9 +27,8 @@
.section .altinstr_replacement, "ax", @progbits .section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c: .Lmemcpy_c:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx
movl %edx, %ecx shrq $3, %rcx
shrl $3, %ecx
andl $7, %edx andl $7, %edx
rep movsq rep movsq
movl %edx, %ecx movl %edx, %ecx
@ -48,8 +47,7 @@
.section .altinstr_replacement, "ax", @progbits .section .altinstr_replacement, "ax", @progbits
.Lmemcpy_c_e: .Lmemcpy_c_e:
movq %rdi, %rax movq %rdi, %rax
movq %rdx, %rcx
movl %edx, %ecx
rep movsb rep movsb
ret ret
.Lmemcpy_e_e: .Lmemcpy_e_e:
@ -60,10 +58,7 @@ ENTRY(memcpy)
CFI_STARTPROC CFI_STARTPROC
movq %rdi, %rax movq %rdi, %rax
/* cmpq $0x20, %rdx
* Use 32bit CMP here to avoid long NOP padding.
*/
cmp $0x20, %edx
jb .Lhandle_tail jb .Lhandle_tail
/* /*
@ -72,7 +67,7 @@ ENTRY(memcpy)
*/ */
cmp %dil, %sil cmp %dil, %sil
jl .Lcopy_backward jl .Lcopy_backward
subl $0x20, %edx subq $0x20, %rdx
.Lcopy_forward_loop: .Lcopy_forward_loop:
subq $0x20, %rdx subq $0x20, %rdx
@ -91,7 +86,7 @@ ENTRY(memcpy)
movq %r11, 3*8(%rdi) movq %r11, 3*8(%rdi)
leaq 4*8(%rdi), %rdi leaq 4*8(%rdi), %rdi
jae .Lcopy_forward_loop jae .Lcopy_forward_loop
addq $0x20, %rdx addl $0x20, %edx
jmp .Lhandle_tail jmp .Lhandle_tail
.Lcopy_backward: .Lcopy_backward:
@ -123,11 +118,11 @@ ENTRY(memcpy)
/* /*
* Calculate copy position to head. * Calculate copy position to head.
*/ */
addq $0x20, %rdx addl $0x20, %edx
subq %rdx, %rsi subq %rdx, %rsi
subq %rdx, %rdi subq %rdx, %rdi
.Lhandle_tail: .Lhandle_tail:
cmpq $16, %rdx cmpl $16, %edx
jb .Lless_16bytes jb .Lless_16bytes
/* /*
@ -144,7 +139,7 @@ ENTRY(memcpy)
retq retq
.p2align 4 .p2align 4
.Lless_16bytes: .Lless_16bytes:
cmpq $8, %rdx cmpl $8, %edx
jb .Lless_8bytes jb .Lless_8bytes
/* /*
* Move data from 8 bytes to 15 bytes. * Move data from 8 bytes to 15 bytes.
@ -156,7 +151,7 @@ ENTRY(memcpy)
retq retq
.p2align 4 .p2align 4
.Lless_8bytes: .Lless_8bytes:
cmpq $4, %rdx cmpl $4, %edx
jb .Lless_3bytes jb .Lless_3bytes
/* /*
@ -169,18 +164,19 @@ ENTRY(memcpy)
retq retq
.p2align 4 .p2align 4
.Lless_3bytes: .Lless_3bytes:
cmpl $0, %edx subl $1, %edx
je .Lend jb .Lend
/* /*
* Move data from 1 bytes to 3 bytes. * Move data from 1 bytes to 3 bytes.
*/ */
.Lloop_1: movzbl (%rsi), %ecx
movb (%rsi), %r8b jz .Lstore_1byte
movb %r8b, (%rdi) movzbq 1(%rsi), %r8
incq %rdi movzbq (%rsi, %rdx), %r9
incq %rsi movb %r8b, 1(%rdi)
decl %edx movb %r9b, (%rdi, %rdx)
jnz .Lloop_1 .Lstore_1byte:
movb %cl, (%rdi)
.Lend: .Lend:
retq retq

View File

@ -19,16 +19,15 @@
.section .altinstr_replacement, "ax", @progbits .section .altinstr_replacement, "ax", @progbits
.Lmemset_c: .Lmemset_c:
movq %rdi,%r9 movq %rdi,%r9
movl %edx,%r8d movq %rdx,%rcx
andl $7,%r8d andl $7,%edx
movl %edx,%ecx shrq $3,%rcx
shrl $3,%ecx
/* expand byte value */ /* expand byte value */
movzbl %sil,%esi movzbl %sil,%esi
movabs $0x0101010101010101,%rax movabs $0x0101010101010101,%rax
mulq %rsi /* with rax, clobbers rdx */ imulq %rsi,%rax
rep stosq rep stosq
movl %r8d,%ecx movl %edx,%ecx
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
@ -50,7 +49,7 @@
.Lmemset_c_e: .Lmemset_c_e:
movq %rdi,%r9 movq %rdi,%r9
movb %sil,%al movb %sil,%al
movl %edx,%ecx movq %rdx,%rcx
rep stosb rep stosb
movq %r9,%rax movq %r9,%rax
ret ret
@ -61,12 +60,11 @@ ENTRY(memset)
ENTRY(__memset) ENTRY(__memset)
CFI_STARTPROC CFI_STARTPROC
movq %rdi,%r10 movq %rdi,%r10
movq %rdx,%r11
/* expand byte value */ /* expand byte value */
movzbl %sil,%ecx movzbl %sil,%ecx
movabs $0x0101010101010101,%rax movabs $0x0101010101010101,%rax
mul %rcx /* with rax, clobbers rdx */ imulq %rcx,%rax
/* align dst */ /* align dst */
movl %edi,%r9d movl %edi,%r9d
@ -75,13 +73,13 @@ ENTRY(__memset)
CFI_REMEMBER_STATE CFI_REMEMBER_STATE
.Lafter_bad_alignment: .Lafter_bad_alignment:
movl %r11d,%ecx movq %rdx,%rcx
shrl $6,%ecx shrq $6,%rcx
jz .Lhandle_tail jz .Lhandle_tail
.p2align 4 .p2align 4
.Lloop_64: .Lloop_64:
decl %ecx decq %rcx
movq %rax,(%rdi) movq %rax,(%rdi)
movq %rax,8(%rdi) movq %rax,8(%rdi)
movq %rax,16(%rdi) movq %rax,16(%rdi)
@ -97,7 +95,7 @@ ENTRY(__memset)
to predict jump tables. */ to predict jump tables. */
.p2align 4 .p2align 4
.Lhandle_tail: .Lhandle_tail:
movl %r11d,%ecx movl %edx,%ecx
andl $63&(~7),%ecx andl $63&(~7),%ecx
jz .Lhandle_7 jz .Lhandle_7
shrl $3,%ecx shrl $3,%ecx
@ -109,12 +107,11 @@ ENTRY(__memset)
jnz .Lloop_8 jnz .Lloop_8
.Lhandle_7: .Lhandle_7:
movl %r11d,%ecx andl $7,%edx
andl $7,%ecx
jz .Lende jz .Lende
.p2align 4 .p2align 4
.Lloop_1: .Lloop_1:
decl %ecx decl %edx
movb %al,(%rdi) movb %al,(%rdi)
leaq 1(%rdi),%rdi leaq 1(%rdi),%rdi
jnz .Lloop_1 jnz .Lloop_1
@ -125,13 +122,13 @@ ENTRY(__memset)
CFI_RESTORE_STATE CFI_RESTORE_STATE
.Lbad_alignment: .Lbad_alignment:
cmpq $7,%r11 cmpq $7,%rdx
jbe .Lhandle_7 jbe .Lhandle_7
movq %rax,(%rdi) /* unaligned store */ movq %rax,(%rdi) /* unaligned store */
movq $8,%r8 movq $8,%r8
subq %r9,%r8 subq %r9,%r8
addq %r8,%rdi addq %r8,%rdi
subq %r8,%r11 subq %r8,%rdx
jmp .Lafter_bad_alignment jmp .Lafter_bad_alignment
.Lfinal: .Lfinal:
CFI_ENDPROC CFI_ENDPROC

View File

@ -4,21 +4,58 @@
#ifndef __ASSEMBLY__ #ifndef __ASSEMBLY__
#include <linux/compiler.h> #include <linux/compiler.h>
#include <linux/log2.h>
/* Pure 2^n version of get_order */ /*
static inline __attribute_const__ int get_order(unsigned long size) * Runtime evaluation of get_order()
*/
static inline __attribute_const__
int __get_order(unsigned long size)
{ {
int order; int order;
size = (size - 1) >> (PAGE_SHIFT - 1); size--;
order = -1; size >>= PAGE_SHIFT;
do { #if BITS_PER_LONG == 32
size >>= 1; order = fls(size);
order++; #else
} while (size); order = fls64(size);
#endif
return order; return order;
} }
/**
* get_order - Determine the allocation order of a memory size
* @size: The size for which to get the order
*
* Determine the allocation order of a particular sized block of memory. This
* is on a logarithmic scale, where:
*
* 0 -> 2^0 * PAGE_SIZE and below
* 1 -> 2^1 * PAGE_SIZE to 2^0 * PAGE_SIZE + 1
* 2 -> 2^2 * PAGE_SIZE to 2^1 * PAGE_SIZE + 1
* 3 -> 2^3 * PAGE_SIZE to 2^2 * PAGE_SIZE + 1
* 4 -> 2^4 * PAGE_SIZE to 2^3 * PAGE_SIZE + 1
* ...
*
* The order returned is used to find the smallest allocation granule required
* to hold an object of the specified size.
*
* The result is undefined if the size is 0.
*
* This function may be used to initialise variables with compile time
* evaluations of constants.
*/
#define get_order(n) \
( \
__builtin_constant_p(n) ? ( \
((n) == 0UL) ? BITS_PER_LONG - PAGE_SHIFT : \
(((n) < (1UL << PAGE_SHIFT)) ? 0 : \
ilog2((n) - 1) - PAGE_SHIFT + 1) \
) : \
__get_order(n) \
)
#endif /* __ASSEMBLY__ */ #endif /* __ASSEMBLY__ */
#endif /* __ASM_GENERIC_GETORDER_H */ #endif /* __ASM_GENERIC_GETORDER_H */