dect
/
linux-2.6
Archived
13
0
Fork 0

Merge branch 'x86/xsave' into x86/core

This commit is contained in:
Ingo Molnar 2008-09-05 09:18:39 +02:00
commit accf0fa697
21 changed files with 899 additions and 184 deletions

View File

@ -179,9 +179,10 @@ struct sigframe
u32 pretcode; u32 pretcode;
int sig; int sig;
struct sigcontext_ia32 sc; struct sigcontext_ia32 sc;
struct _fpstate_ia32 fpstate; struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
unsigned int extramask[_COMPAT_NSIG_WORDS-1]; unsigned int extramask[_COMPAT_NSIG_WORDS-1];
char retcode[8]; char retcode[8];
/* fp state follows here */
}; };
struct rt_sigframe struct rt_sigframe
@ -192,8 +193,8 @@ struct rt_sigframe
u32 puc; u32 puc;
compat_siginfo_t info; compat_siginfo_t info;
struct ucontext_ia32 uc; struct ucontext_ia32 uc;
struct _fpstate_ia32 fpstate;
char retcode[8]; char retcode[8];
/* fp state follows here */
}; };
#define COPY(x) { \ #define COPY(x) { \
@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
unsigned int *peax) unsigned int *peax)
{ {
unsigned int tmpflags, gs, oldgs, err = 0; unsigned int tmpflags, gs, oldgs, err = 0;
struct _fpstate_ia32 __user *buf; void __user *buf;
u32 tmp; u32 tmp;
/* Always make any pending restarted system calls return -EINTR */ /* Always make any pending restarted system calls return -EINTR */
@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
err |= __get_user(tmp, &sc->fpstate); err |= __get_user(tmp, &sc->fpstate);
buf = compat_ptr(tmp); buf = compat_ptr(tmp);
if (buf) { err |= restore_i387_xstate_ia32(buf);
if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387_ia32(buf);
} else {
struct task_struct *me = current;
if (used_math()) {
clear_fpu(me);
clear_used_math();
}
}
err |= __get_user(tmp, &sc->ax); err |= __get_user(tmp, &sc->ax);
*peax = tmp; *peax = tmp;
return err; return err;
badframe:
return 1;
} }
asmlinkage long sys32_sigreturn(struct pt_regs *regs) asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@ -350,7 +337,7 @@ badframe:
*/ */
static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
struct _fpstate_ia32 __user *fpstate, void __user *fpstate,
struct pt_regs *regs, unsigned int mask) struct pt_regs *regs, unsigned int mask)
{ {
int tmp, err = 0; int tmp, err = 0;
@ -381,7 +368,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
err |= __put_user((u32)regs->flags, &sc->flags); err |= __put_user((u32)regs->flags, &sc->flags);
err |= __put_user((u32)regs->sp, &sc->sp_at_signal); err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
tmp = save_i387_ia32(fpstate); tmp = save_i387_xstate_ia32(fpstate);
if (tmp < 0) if (tmp < 0)
err = -EFAULT; err = -EFAULT;
else { else {
@ -402,7 +389,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
* Determine which stack to use.. * Determine which stack to use..
*/ */
static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
size_t frame_size) size_t frame_size,
void **fpstate)
{ {
unsigned long sp; unsigned long sp;
@ -421,6 +409,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
ka->sa.sa_restorer) ka->sa.sa_restorer)
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
if (used_math()) {
sp = sp - sig_xstate_ia32_size;
*fpstate = (struct _fpstate_ia32 *) sp;
}
sp -= frame_size; sp -= frame_size;
/* Align the stack pointer according to the i386 ABI, /* Align the stack pointer according to the i386 ABI,
* i.e. so that on function entry ((sp + 4) & 15) == 0. */ * i.e. so that on function entry ((sp + 4) & 15) == 0. */
@ -434,6 +427,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
struct sigframe __user *frame; struct sigframe __user *frame;
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
void __user *fpstate = NULL;
/* copy_to_user optimizes that into a single 8 byte store */ /* copy_to_user optimizes that into a single 8 byte store */
static const struct { static const struct {
@ -448,7 +442,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
0, 0,
}; };
frame = get_sigframe(ka, regs, sizeof(*frame)); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; goto give_sigsegv;
@ -457,8 +451,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
if (err) if (err)
goto give_sigsegv; goto give_sigsegv;
err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
set->sig[0]);
if (err) if (err)
goto give_sigsegv; goto give_sigsegv;
@ -522,6 +515,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
void __user *fpstate = NULL;
/* __copy_to_user optimizes that into a single 8 byte store */ /* __copy_to_user optimizes that into a single 8 byte store */
static const struct { static const struct {
@ -537,7 +531,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
0, 0,
}; };
frame = get_sigframe(ka, regs, sizeof(*frame)); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; goto give_sigsegv;
@ -550,13 +544,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv; goto give_sigsegv;
/* Create the ucontext. */ /* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags); if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp), err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags); &frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]); regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err) if (err)

View File

@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
obj-y += process.o obj-y += process.o
obj-y += i387.o obj-y += i387.o xsave.o
obj-y += ptrace.o obj-y += ptrace.o
obj-y += ds.o obj-y += ds.o
obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_X86_32) += tls.o

View File

@ -739,9 +739,20 @@ void __cpuinit cpu_init(void)
/* /*
* Force FPU initialization: * Force FPU initialization:
*/ */
current_thread_info()->status = 0; if (cpu_has_xsave)
current_thread_info()->status = TS_XSAVE;
else
current_thread_info()->status = 0;
clear_used_math(); clear_used_math();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
/*
* Boot processor to setup the FP and extended state context info.
*/
if (!smp_processor_id())
init_thread_xstate();
xsave_init();
} }
#ifdef CONFIG_HOTPLUG_CPU #ifdef CONFIG_HOTPLUG_CPU

View File

@ -21,9 +21,12 @@
# include <asm/sigcontext32.h> # include <asm/sigcontext32.h>
# include <asm/user32.h> # include <asm/user32.h>
#else #else
# define save_i387_ia32 save_i387 # define save_i387_xstate_ia32 save_i387_xstate
# define restore_i387_ia32 restore_i387 # define restore_i387_xstate_ia32 restore_i387_xstate
# define _fpstate_ia32 _fpstate # define _fpstate_ia32 _fpstate
# define _xstate_ia32 _xstate
# define sig_xstate_ia32_size sig_xstate_size
# define fx_sw_reserved_ia32 fx_sw_reserved
# define user_i387_ia32_struct user_i387_struct # define user_i387_ia32_struct user_i387_struct
# define user32_fxsr_struct user_fxsr_struct # define user32_fxsr_struct user_fxsr_struct
#endif #endif
@ -36,6 +39,7 @@
static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
unsigned int xstate_size; unsigned int xstate_size;
unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
static struct i387_fxsave_struct fx_scratch __cpuinitdata; static struct i387_fxsave_struct fx_scratch __cpuinitdata;
void __cpuinit mxcsr_feature_mask_init(void) void __cpuinit mxcsr_feature_mask_init(void)
@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
return; return;
} }
if (cpu_has_xsave) {
xsave_cntxt_init();
return;
}
if (cpu_has_fxsr) if (cpu_has_fxsr)
xstate_size = sizeof(struct i387_fxsave_struct); xstate_size = sizeof(struct i387_fxsave_struct);
#ifdef CONFIG_X86_32 #ifdef CONFIG_X86_32
@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
/*
* Boot processor to setup the FP and extended state context info.
*/
if (!smp_processor_id())
init_thread_xstate();
xsave_init();
mxcsr_feature_mask_init(); mxcsr_feature_mask_init();
/* clean state in init */ /* clean state in init */
current_thread_info()->status = 0; if (cpu_has_xsave)
current_thread_info()->status = TS_XSAVE;
else
current_thread_info()->status = 0;
clear_used_math(); clear_used_math();
} }
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
*/ */
target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
/*
* update the header bits in the xsave header, indicating the
* presence of FP and SSE state.
*/
if (cpu_has_xsave)
target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
return ret; return ret;
} }
@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
if (!ret) if (!ret)
convert_to_fxsr(target, &env); convert_to_fxsr(target, &env);
/*
* update the header bit in the xsave header, indicating the
* presence of FP.
*/
if (cpu_has_xsave)
target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
return ret; return ret;
} }
@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
unlazy_fpu(tsk);
fp->status = fp->swd; fp->status = fp->swd;
if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
return -1; return -1;
@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
struct user_i387_ia32_struct env; struct user_i387_ia32_struct env;
int err = 0; int err = 0;
unlazy_fpu(tsk);
convert_from_fxsr(&env, tsk); convert_from_fxsr(&env, tsk);
if (__copy_to_user(buf, &env, sizeof(env))) if (__copy_to_user(buf, &env, sizeof(env)))
return -1; return -1;
@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
if (err) if (err)
return -1; return -1;
if (__copy_to_user(&buf->_fxsr_env[0], fx, if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
sizeof(struct i387_fxsave_struct)))
return -1; return -1;
return 1; return 1;
} }
int save_i387_ia32(struct _fpstate_ia32 __user *buf) static int save_i387_xsave(void __user *buf)
{ {
struct _fpstate_ia32 __user *fx = buf;
int err = 0;
if (save_i387_fxsave(fx) < 0)
return -1;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
sizeof(struct _fpx_sw_bytes));
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_ia32_size
- FP_XSTATE_MAGIC2_SIZE));
if (err)
return -1;
return 1;
}
int save_i387_xstate_ia32(void __user *buf)
{
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
struct task_struct *tsk = current;
if (!used_math()) if (!used_math())
return 0; return 0;
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
return -EACCES;
/* /*
* This will cause a "finit" to be triggered by the next * This will cause a "finit" to be triggered by the next
* attempted FPU operation by the 'current' process. * attempted FPU operation by the 'current' process.
@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
if (!HAVE_HWFP) { if (!HAVE_HWFP) {
return fpregs_soft_get(current, NULL, return fpregs_soft_get(current, NULL,
0, sizeof(struct user_i387_ia32_struct), 0, sizeof(struct user_i387_ia32_struct),
NULL, buf) ? -1 : 1; NULL, fp) ? -1 : 1;
} }
unlazy_fpu(tsk);
if (cpu_has_xsave)
return save_i387_xsave(fp);
if (cpu_has_fxsr) if (cpu_has_fxsr)
return save_i387_fxsave(buf); return save_i387_fxsave(fp);
else else
return save_i387_fsave(buf); return save_i387_fsave(fp);
} }
static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
sizeof(struct i387_fsave_struct)); sizeof(struct i387_fsave_struct));
} }
static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
unsigned int size)
{ {
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct user_i387_ia32_struct env; struct user_i387_ia32_struct env;
int err; int err;
err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
sizeof(struct i387_fxsave_struct)); size);
/* mxcsr reserved bits must be masked to zero for security reasons */ /* mxcsr reserved bits must be masked to zero for security reasons */
tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
if (err || __copy_from_user(&env, buf, sizeof(env))) if (err || __copy_from_user(&env, buf, sizeof(env)))
@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
return 0; return 0;
} }
int restore_i387_ia32(struct _fpstate_ia32 __user *buf) static int restore_i387_xsave(void __user *buf)
{
struct _fpx_sw_bytes fx_sw_user;
struct _fpstate_ia32 __user *fx_user =
((struct _fpstate_ia32 __user *) buf);
struct i387_fxsave_struct __user *fx =
(struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
struct xsave_hdr_struct *xsave_hdr =
&current->thread.xstate->xsave.xsave_hdr;
u64 mask;
int err;
if (check_for_xstate(fx, buf, &fx_sw_user))
goto fx_only;
mask = fx_sw_user.xstate_bv;
err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
xsave_hdr->xstate_bv &= pcntxt_mask;
/*
* These bits must be zero.
*/
xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
/*
* Init the state that is not present in the memory layout
* and enabled by the OS.
*/
mask = ~(pcntxt_mask & ~mask);
xsave_hdr->xstate_bv &= mask;
return err;
fx_only:
/*
* Couldn't find the extended state information in the memory
* layout. Restore the FP/SSE and init the other extended state
* enabled by the OS.
*/
xsave_hdr->xstate_bv = XSTATE_FPSSE;
return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
}
int restore_i387_xstate_ia32(void __user *buf)
{ {
int err; int err;
struct task_struct *tsk = current; struct task_struct *tsk = current;
struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
if (HAVE_HWFP) if (HAVE_HWFP)
clear_fpu(tsk); clear_fpu(tsk);
if (!buf) {
if (used_math()) {
clear_fpu(tsk);
clear_used_math();
}
return 0;
} else
if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
return -EACCES;
if (!used_math()) { if (!used_math()) {
err = init_fpu(tsk); err = init_fpu(tsk);
if (err) if (err)
@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
} }
if (HAVE_HWFP) { if (HAVE_HWFP) {
if (cpu_has_fxsr) if (cpu_has_xsave)
err = restore_i387_fxsave(buf); err = restore_i387_xsave(buf);
else if (cpu_has_fxsr)
err = restore_i387_fxsave(fp, sizeof(struct
i387_fxsave_struct));
else else
err = restore_i387_fsave(buf); err = restore_i387_fsave(fp);
} else { } else {
err = fpregs_soft_set(current, NULL, err = fpregs_soft_set(current, NULL,
0, sizeof(struct user_i387_ia32_struct), 0, sizeof(struct user_i387_ia32_struct),
NULL, buf) != 0; NULL, fp) != 0;
} }
set_used_math(); set_used_math();

View File

@ -3,9 +3,18 @@ struct sigframe {
char __user *pretcode; char __user *pretcode;
int sig; int sig;
struct sigcontext sc; struct sigcontext sc;
struct _fpstate fpstate; /*
* fpstate is unused. fpstate is moved/allocated after
* retcode[] below. This movement allows to have the FP state and the
* future state extensions (xsave) stay together.
* And at the same time retaining the unused fpstate, prevents changing
* the offset of extramask[] in the sigframe and thus prevent any
* legacy application accessing/modifying it.
*/
struct _fpstate fpstate_unused;
unsigned long extramask[_NSIG_WORDS-1]; unsigned long extramask[_NSIG_WORDS-1];
char retcode[8]; char retcode[8];
/* fp state follows here */
}; };
struct rt_sigframe { struct rt_sigframe {
@ -15,13 +24,14 @@ struct rt_sigframe {
void __user *puc; void __user *puc;
struct siginfo info; struct siginfo info;
struct ucontext uc; struct ucontext uc;
struct _fpstate fpstate;
char retcode[8]; char retcode[8];
/* fp state follows here */
}; };
#else #else
struct rt_sigframe { struct rt_sigframe {
char __user *pretcode; char __user *pretcode;
struct ucontext uc; struct ucontext uc;
struct siginfo info; struct siginfo info;
/* fp state follows here */
}; };
#endif #endif

View File

@ -160,28 +160,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
} }
{ {
struct _fpstate __user *buf; void __user *buf;
err |= __get_user(buf, &sc->fpstate); err |= __get_user(buf, &sc->fpstate);
if (buf) { err |= restore_i387_xstate(buf);
if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387(buf);
} else {
struct task_struct *me = current;
if (used_math()) {
clear_fpu(me);
clear_used_math();
}
}
} }
err |= __get_user(*pax, &sc->ax); err |= __get_user(*pax, &sc->ax);
return err; return err;
badframe:
return 1;
} }
asmlinkage unsigned long sys_sigreturn(unsigned long __unused) asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@ -263,7 +249,7 @@ badframe:
* Set up a signal frame. * Set up a signal frame.
*/ */
static int static int
setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
struct pt_regs *regs, unsigned long mask) struct pt_regs *regs, unsigned long mask)
{ {
int tmp, err = 0; int tmp, err = 0;
@ -290,7 +276,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
err |= __put_user(regs->sp, &sc->sp_at_signal); err |= __put_user(regs->sp, &sc->sp_at_signal);
err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
tmp = save_i387(fpstate); tmp = save_i387_xstate(fpstate);
if (tmp < 0) if (tmp < 0)
err = 1; err = 1;
else else
@ -307,7 +293,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
* Determine which stack to use.. * Determine which stack to use..
*/ */
static inline void __user * static inline void __user *
get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
void **fpstate)
{ {
unsigned long sp; unsigned long sp;
@ -333,6 +320,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
sp = (unsigned long) ka->sa.sa_restorer; sp = (unsigned long) ka->sa.sa_restorer;
} }
if (used_math()) {
sp = sp - sig_xstate_size;
*fpstate = (struct _fpstate *) sp;
}
sp -= frame_size; sp -= frame_size;
/* /*
* Align the stack pointer according to the i386 ABI, * Align the stack pointer according to the i386 ABI,
@ -351,8 +343,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
int usig; int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame)); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; goto give_sigsegv;
@ -367,7 +360,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
if (err) if (err)
goto give_sigsegv; goto give_sigsegv;
err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
if (err) if (err)
goto give_sigsegv; goto give_sigsegv;
@ -428,8 +421,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
void __user *restorer; void __user *restorer;
int err = 0; int err = 0;
int usig; int usig;
void __user *fpstate = NULL;
frame = get_sigframe(ka, regs, sizeof(*frame)); frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
goto give_sigsegv; goto give_sigsegv;
@ -448,13 +442,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
goto give_sigsegv; goto give_sigsegv;
/* Create the ucontext. */ /* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags); if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp), err |= __put_user(sas_ss_flags(regs->sp),
&frame->uc.uc_stack.ss_flags); &frame->uc.uc_stack.ss_flags);
err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
regs, set->sig[0]); regs, set->sig[0]);
err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
if (err) if (err)

View File

@ -54,69 +54,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
return do_sigaltstack(uss, uoss, regs->sp); return do_sigaltstack(uss, uoss, regs->sp);
} }
/*
* Signal frame handlers.
*/
static inline int save_i387(struct _fpstate __user *buf)
{
struct task_struct *tsk = current;
int err = 0;
BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
sizeof(tsk->thread.xstate->fxsave));
if ((unsigned long)buf % 16)
printk("save_i387: bad fpstate %p\n", buf);
if (!used_math())
return 0;
clear_used_math(); /* trigger finit */
if (task_thread_info(tsk)->status & TS_USEDFPU) {
err = save_i387_checking((struct i387_fxsave_struct __user *)
buf);
if (err)
return err;
task_thread_info(tsk)->status &= ~TS_USEDFPU;
stts();
} else {
if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
sizeof(struct i387_fxsave_struct)))
return -1;
}
return 1;
}
/*
* This restores directly out of user space. Exceptions are handled.
*/
static inline int restore_i387(struct _fpstate __user *buf)
{
struct task_struct *tsk = current;
int err;
if (!used_math()) {
err = init_fpu(tsk);
if (err)
return err;
}
if (!(task_thread_info(current)->status & TS_USEDFPU)) {
clts();
task_thread_info(current)->status |= TS_USEDFPU;
}
err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
if (unlikely(err)) {
/*
* Encountered an error while doing the restore from the
* user buffer, clear the fpu state.
*/
clear_fpu(tsk);
clear_used_math();
}
return err;
}
/* /*
* Do a signal return; undo the signal stack. * Do a signal return; undo the signal stack.
*/ */
@ -161,25 +98,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
{ {
struct _fpstate __user * buf; struct _fpstate __user * buf;
err |= __get_user(buf, &sc->fpstate); err |= __get_user(buf, &sc->fpstate);
err |= restore_i387_xstate(buf);
if (buf) {
if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
goto badframe;
err |= restore_i387(buf);
} else {
struct task_struct *me = current;
if (used_math()) {
clear_fpu(me);
clear_used_math();
}
}
} }
err |= __get_user(*pax, &sc->ax); err |= __get_user(*pax, &sc->ax);
return err; return err;
badframe:
return 1;
} }
asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@ -270,26 +193,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
sp = current->sas_ss_sp + current->sas_ss_size; sp = current->sas_ss_sp + current->sas_ss_size;
} }
return (void __user *)round_down(sp - size, 16); return (void __user *)round_down(sp - size, 64);
} }
static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
sigset_t *set, struct pt_regs * regs) sigset_t *set, struct pt_regs * regs)
{ {
struct rt_sigframe __user *frame; struct rt_sigframe __user *frame;
struct _fpstate __user *fp = NULL; void __user *fp = NULL;
int err = 0; int err = 0;
struct task_struct *me = current; struct task_struct *me = current;
if (used_math()) { if (used_math()) {
fp = get_stack(ka, regs, sizeof(struct _fpstate)); fp = get_stack(ka, regs, sig_xstate_size);
frame = (void __user *)round_down( frame = (void __user *)round_down(
(unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) if (save_i387_xstate(fp) < 0)
goto give_sigsegv;
if (save_i387(fp) < 0)
err |= -1; err |= -1;
} else } else
frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@ -304,7 +224,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
} }
/* Create the ucontext. */ /* Create the ucontext. */
err |= __put_user(0, &frame->uc.uc_flags); if (cpu_has_xsave)
err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
else
err |= __put_user(0, &frame->uc.uc_flags);
err |= __put_user(0, &frame->uc.uc_link); err |= __put_user(0, &frame->uc.uc_link);
err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
err |= __put_user(sas_ss_flags(regs->sp), err |= __put_user(sas_ss_flags(regs->sp),

View File

@ -1228,7 +1228,6 @@ void __init trap_init(void)
set_bit(SYSCALL_VECTOR, used_vectors); set_bit(SYSCALL_VECTOR, used_vectors);
init_thread_xstate();
/* /*
* Should be a barrier for any external CPU state: * Should be a barrier for any external CPU state:
*/ */

View File

@ -1134,7 +1134,7 @@ asmlinkage void math_state_restore(void)
/* /*
* Paranoid restore. send a SIGSEGV if we fail to restore the state. * Paranoid restore. send a SIGSEGV if we fail to restore the state.
*/ */
if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { if (unlikely(restore_fpu_checking(me))) {
stts(); stts();
force_sig(SIGSEGV, me); force_sig(SIGSEGV, me);
return; return;
@ -1172,10 +1172,6 @@ void __init trap_init(void)
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
#endif #endif
/*
* initialize the per thread extended state:
*/
init_thread_xstate();
/* /*
* Should be a barrier for any external CPU state: * Should be a barrier for any external CPU state:
*/ */

316
arch/x86/kernel/xsave.c Normal file
View File

@ -0,0 +1,316 @@
/*
* xsave/xrstor support.
*
* Author: Suresh Siddha <suresh.b.siddha@intel.com>
*/
#include <linux/bootmem.h>
#include <linux/compat.h>
#include <asm/i387.h>
#ifdef CONFIG_IA32_EMULATION
#include <asm/sigcontext32.h>
#endif
#include <asm/xcr.h>
/*
* Supported feature mask by the CPU and the kernel.
*/
u64 pcntxt_mask;
struct _fpx_sw_bytes fx_sw_reserved;
#ifdef CONFIG_IA32_EMULATION
struct _fpx_sw_bytes fx_sw_reserved_ia32;
#endif
/*
* Check for the presence of extended state information in the
* user fpstate pointer in the sigcontext.
*/
int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *fx_sw_user)
{
int min_xstate_size = sizeof(struct i387_fxsave_struct) +
sizeof(struct xsave_hdr_struct);
unsigned int magic2;
int err;
err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
sizeof(struct _fpx_sw_bytes));
if (err)
return err;
/*
* First Magic check failed.
*/
if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
return -1;
/*
* Check for error scenarios.
*/
if (fx_sw_user->xstate_size < min_xstate_size ||
fx_sw_user->xstate_size > xstate_size ||
fx_sw_user->xstate_size > fx_sw_user->extended_size)
return -1;
err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
fx_sw_user->extended_size -
FP_XSTATE_MAGIC2_SIZE));
/*
* Check for the presence of second magic word at the end of memory
* layout. This detects the case where the user just copied the legacy
* fpstate layout with out copying the extended state information
* in the memory layout.
*/
if (err || magic2 != FP_XSTATE_MAGIC2)
return -1;
return 0;
}
#ifdef CONFIG_X86_64
/*
* Signal frame handlers.
*/
int save_i387_xstate(void __user *buf)
{
struct task_struct *tsk = current;
int err = 0;
if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
return -EACCES;
BUG_ON(sig_xstate_size < xstate_size);
if ((unsigned long)buf % 64)
printk("save_i387_xstate: bad fpstate %p\n", buf);
if (!used_math())
return 0;
clear_used_math(); /* trigger finit */
if (task_thread_info(tsk)->status & TS_USEDFPU) {
/*
* Start with clearing the user buffer. This will present a
* clean context for the bytes not touched by the fxsave/xsave.
*/
__clear_user(buf, sig_xstate_size);
if (task_thread_info(tsk)->status & TS_XSAVE)
err = xsave_user(buf);
else
err = fxsave_user(buf);
if (err)
return err;
task_thread_info(tsk)->status &= ~TS_USEDFPU;
stts();
} else {
if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
xstate_size))
return -1;
}
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct _fpstate __user *fx = buf;
err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
err |= __put_user(FP_XSTATE_MAGIC2,
(__u32 __user *) (buf + sig_xstate_size
- FP_XSTATE_MAGIC2_SIZE));
}
return 1;
}
/*
* Restore the extended state if present. Otherwise, restore the FP/SSE
* state.
*/
int restore_user_xstate(void __user *buf)
{
struct _fpx_sw_bytes fx_sw_user;
u64 mask;
int err;
if (((unsigned long)buf % 64) ||
check_for_xstate(buf, buf, &fx_sw_user))
goto fx_only;
mask = fx_sw_user.xstate_bv;
/*
* restore the state passed by the user.
*/
err = xrestore_user(buf, mask);
if (err)
return err;
/*
* init the state skipped by the user.
*/
mask = pcntxt_mask & ~mask;
xrstor_state(init_xstate_buf, mask);
return 0;
fx_only:
/*
* couldn't find the extended state information in the
* memory layout. Restore just the FP/SSE and init all
* the other extended state.
*/
xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
}
/*
* This restores directly out of user space. Exceptions are handled.
*/
int restore_i387_xstate(void __user *buf)
{
struct task_struct *tsk = current;
int err = 0;
if (!buf) {
if (used_math())
goto clear;
return 0;
} else
if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
return -EACCES;
if (!used_math()) {
err = init_fpu(tsk);
if (err)
return err;
}
if (!(task_thread_info(current)->status & TS_USEDFPU)) {
clts();
task_thread_info(current)->status |= TS_USEDFPU;
}
if (task_thread_info(tsk)->status & TS_XSAVE)
err = restore_user_xstate(buf);
else
err = fxrstor_checking((__force struct i387_fxsave_struct *)
buf);
if (unlikely(err)) {
/*
* Encountered an error while doing the restore from the
* user buffer, clear the fpu state.
*/
clear:
clear_fpu(tsk);
clear_used_math();
}
return err;
}
#endif
/*
* Prepare the SW reserved portion of the fxsave memory layout, indicating
* the presence of the extended state information in the memory layout
* pointed by the fpstate pointer in the sigcontext.
* This will be saved when ever the FP and extended state context is
* saved on the user stack during the signal handler delivery to the user.
*/
void prepare_fx_sw_frame(void)
{
int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
FP_XSTATE_MAGIC2_SIZE;
sig_xstate_size = sizeof(struct _fpstate) + size_extended;
#ifdef CONFIG_IA32_EMULATION
sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
#endif
memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
fx_sw_reserved.extended_size = sig_xstate_size;
fx_sw_reserved.xstate_bv = pcntxt_mask;
fx_sw_reserved.xstate_size = xstate_size;
#ifdef CONFIG_IA32_EMULATION
memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
sizeof(struct _fpx_sw_bytes));
fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
#endif
}
/*
* Represents init state for the supported extended state.
*/
struct xsave_struct *init_xstate_buf;
#ifdef CONFIG_X86_64
unsigned int sig_xstate_size = sizeof(struct _fpstate);
#endif
/*
* Enable the extended processor state save/restore feature
*/
void __cpuinit xsave_init(void)
{
if (!cpu_has_xsave)
return;
set_in_cr4(X86_CR4_OSXSAVE);
/*
* Enable all the features that the HW is capable of
* and the Linux kernel is aware of.
*/
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
}
/*
* setup the xstate image representing the init state
*/
void setup_xstate_init(void)
{
init_xstate_buf = alloc_bootmem(xstate_size);
init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
}
/*
* Enable and initialize the xsave feature.
*/
void __init xsave_cntxt_init(void)
{
unsigned int eax, ebx, ecx, edx;
cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
pcntxt_mask = eax + ((u64)edx << 32);
if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
pcntxt_mask);
BUG();
}
/*
* for now OS knows only about FP/SSE
*/
pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
xsave_init();
/*
* Recompute the context size for enabled features
*/
cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
xstate_size = ebx;
prepare_fx_sw_frame();
setup_xstate_init();
printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
"cntxt size 0x%x\n",
pcntxt_mask, xstate_size);
}

View File

@ -11,6 +11,7 @@
#include <linux/suspend.h> #include <linux/suspend.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/mce.h> #include <asm/mce.h>
#include <asm/xcr.h>
static struct saved_context saved_context; static struct saved_context saved_context;
@ -126,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
if (boot_cpu_has(X86_FEATURE_SEP)) if (boot_cpu_has(X86_FEATURE_SEP))
enable_sep_cpu(); enable_sep_cpu();
/*
* restore XCR0 for xsave capable cpu's.
*/
if (cpu_has_xsave)
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
fix_processor_context(); fix_processor_context();
do_fpu_end(); do_fpu_end();
mtrr_ap_init(); mtrr_ap_init();

View File

@ -14,6 +14,7 @@
#include <asm/page.h> #include <asm/page.h>
#include <asm/pgtable.h> #include <asm/pgtable.h>
#include <asm/mtrr.h> #include <asm/mtrr.h>
#include <asm/xcr.h>
static void fix_processor_context(void); static void fix_processor_context(void);
@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
wrmsrl(MSR_GS_BASE, ctxt->gs_base); wrmsrl(MSR_GS_BASE, ctxt->gs_base);
wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
/*
* restore XCR0 for xsave capable cpu's.
*/
if (cpu_has_xsave)
xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
fix_processor_context(); fix_processor_context();
do_fpu_end(); do_fpu_end();

View File

@ -19,7 +19,9 @@
#include <asm/sigcontext.h> #include <asm/sigcontext.h>
#include <asm/user.h> #include <asm/user.h>
#include <asm/uaccess.h> #include <asm/uaccess.h>
#include <asm/xsave.h>
extern unsigned int sig_xstate_size;
extern void fpu_init(void); extern void fpu_init(void);
extern void mxcsr_feature_mask_init(void); extern void mxcsr_feature_mask_init(void);
extern int init_fpu(struct task_struct *child); extern int init_fpu(struct task_struct *child);
@ -31,12 +33,18 @@ extern user_regset_active_fn fpregs_active, xfpregs_active;
extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
extern struct _fpx_sw_bytes fx_sw_reserved;
#ifdef CONFIG_IA32_EMULATION #ifdef CONFIG_IA32_EMULATION
extern unsigned int sig_xstate_ia32_size;
extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
struct _fpstate_ia32; struct _fpstate_ia32;
extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); struct _xstate_ia32;
extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); extern int save_i387_xstate_ia32(void __user *buf);
extern int restore_i387_xstate_ia32(void __user *buf);
#endif #endif
#define X87_FSW_ES (1 << 7) /* Exception Summary */
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
/* Ignore delayed exceptions from user space */ /* Ignore delayed exceptions from user space */
@ -47,7 +55,7 @@ static inline void tolerant_fwait(void)
_ASM_EXTABLE(1b, 2b)); _ASM_EXTABLE(1b, 2b));
} }
static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
{ {
int err; int err;
@ -67,15 +75,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
return err; return err;
} }
#define X87_FSW_ES (1 << 7) /* Exception Summary */ static inline int restore_fpu_checking(struct task_struct *tsk)
{
if (task_thread_info(tsk)->status & TS_XSAVE)
return xrstor_checking(&tsk->thread.xstate->xsave);
else
return fxrstor_checking(&tsk->thread.xstate->fxsave);
}
/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception /* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed is pending. Clear the x87 state here by setting it to fixed
values. The kernel data segment can be sometimes 0 and sometimes values. The kernel data segment can be sometimes 0 and sometimes
new user value. Both should be ok. new user value. Both should be ok.
Use the PDA as safe address because it should be already in L1. */ Use the PDA as safe address because it should be already in L1. */
static inline void clear_fpu_state(struct i387_fxsave_struct *fx) static inline void clear_fpu_state(struct task_struct *tsk)
{ {
struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
/*
* xsave header may indicate the init state of the FP.
*/
if ((task_thread_info(tsk)->status & TS_XSAVE) &&
!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
return;
if (unlikely(fx->swd & X87_FSW_ES)) if (unlikely(fx->swd & X87_FSW_ES))
asm volatile("fnclex"); asm volatile("fnclex");
alternative_input(ASM_NOP8 ASM_NOP2, alternative_input(ASM_NOP8 ASM_NOP2,
@ -84,7 +108,7 @@ static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
X86_FEATURE_FXSAVE_LEAK); X86_FEATURE_FXSAVE_LEAK);
} }
static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
{ {
int err; int err;
@ -108,7 +132,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
return err; return err;
} }
static inline void __save_init_fpu(struct task_struct *tsk) static inline void fxsave(struct task_struct *tsk)
{ {
/* Using "rex64; fxsave %0" is broken because, if the memory operand /* Using "rex64; fxsave %0" is broken because, if the memory operand
uses any extended registers for addressing, a second REX prefix uses any extended registers for addressing, a second REX prefix
@ -133,7 +157,16 @@ static inline void __save_init_fpu(struct task_struct *tsk)
: "=m" (tsk->thread.xstate->fxsave) : "=m" (tsk->thread.xstate->fxsave)
: "cdaSDb" (&tsk->thread.xstate->fxsave)); : "cdaSDb" (&tsk->thread.xstate->fxsave));
#endif #endif
clear_fpu_state(&tsk->thread.xstate->fxsave); }
static inline void __save_init_fpu(struct task_struct *tsk)
{
if (task_thread_info(tsk)->status & TS_XSAVE)
xsave(tsk);
else
fxsave(tsk);
clear_fpu_state(tsk);
task_thread_info(tsk)->status &= ~TS_USEDFPU; task_thread_info(tsk)->status &= ~TS_USEDFPU;
} }
@ -148,6 +181,10 @@ static inline void tolerant_fwait(void)
static inline void restore_fpu(struct task_struct *tsk) static inline void restore_fpu(struct task_struct *tsk)
{ {
if (task_thread_info(tsk)->status & TS_XSAVE) {
xrstor_checking(&tsk->thread.xstate->xsave);
return;
}
/* /*
* The "nop" is needed to make the instructions the same * The "nop" is needed to make the instructions the same
* length. * length.
@ -173,6 +210,27 @@ static inline void restore_fpu(struct task_struct *tsk)
*/ */
static inline void __save_init_fpu(struct task_struct *tsk) static inline void __save_init_fpu(struct task_struct *tsk)
{ {
if (task_thread_info(tsk)->status & TS_XSAVE) {
struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
xsave(tsk);
/*
* xsave header may indicate the init state of the FP.
*/
if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
goto end;
if (unlikely(fx->swd & X87_FSW_ES))
asm volatile("fnclex");
/*
* we can do a simple return here or be paranoid :)
*/
goto clear_state;
}
/* Use more nops than strictly needed in case the compiler /* Use more nops than strictly needed in case the compiler
varies code */ varies code */
alternative_input( alternative_input(
@ -182,6 +240,7 @@ static inline void __save_init_fpu(struct task_struct *tsk)
X86_FEATURE_FXSR, X86_FEATURE_FXSR,
[fx] "m" (tsk->thread.xstate->fxsave), [fx] "m" (tsk->thread.xstate->fxsave),
[fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
clear_state:
/* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
is pending. Clear the x87 state here by setting it to fixed is pending. Clear the x87 state here by setting it to fixed
values. safe_address is a random variable that should be in L1 */ values. safe_address is a random variable that should be in L1 */
@ -191,16 +250,17 @@ static inline void __save_init_fpu(struct task_struct *tsk)
"fildl %[addr]", /* set F?P to defined value */ "fildl %[addr]", /* set F?P to defined value */
X86_FEATURE_FXSAVE_LEAK, X86_FEATURE_FXSAVE_LEAK,
[addr] "m" (safe_address)); [addr] "m" (safe_address));
end:
task_thread_info(tsk)->status &= ~TS_USEDFPU; task_thread_info(tsk)->status &= ~TS_USEDFPU;
} }
#endif /* CONFIG_X86_64 */
/* /*
* Signal frame handlers... * Signal frame handlers...
*/ */
extern int save_i387(struct _fpstate __user *buf); extern int save_i387_xstate(void __user *buf);
extern int restore_i387(struct _fpstate __user *buf); extern int restore_i387_xstate(void __user *buf);
#endif /* CONFIG_X86_64 */
static inline void __unlazy_fpu(struct task_struct *tsk) static inline void __unlazy_fpu(struct task_struct *tsk)
{ {

View File

@ -59,6 +59,7 @@
#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ #define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ #define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
/* /*
* x86-64 Task Priority Register, CR8 * x86-64 Task Priority Register, CR8

View File

@ -326,7 +326,12 @@ struct i387_fxsave_struct {
/* 16*16 bytes for each XMM-reg = 256 bytes: */ /* 16*16 bytes for each XMM-reg = 256 bytes: */
u32 xmm_space[64]; u32 xmm_space[64];
u32 padding[24]; u32 padding[12];
union {
u32 padding1[12];
u32 sw_reserved[12];
};
} __attribute__((aligned(16))); } __attribute__((aligned(16)));
@ -350,10 +355,23 @@ struct i387_soft_struct {
u32 entry_eip; u32 entry_eip;
}; };
struct xsave_hdr_struct {
u64 xstate_bv;
u64 reserved1[2];
u64 reserved2[5];
} __attribute__((packed));
struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
/* new processor state extensions will go here */
} __attribute__ ((packed, aligned (64)));
union thread_xstate { union thread_xstate {
struct i387_fsave_struct fsave; struct i387_fsave_struct fsave;
struct i387_fxsave_struct fxsave; struct i387_fxsave_struct fxsave;
struct i387_soft_struct soft; struct i387_soft_struct soft;
struct xsave_struct xsave;
}; };
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64

View File

@ -4,6 +4,40 @@
#include <linux/compiler.h> #include <linux/compiler.h>
#include <asm/types.h> #include <asm/types.h>
#define FP_XSTATE_MAGIC1 0x46505853U
#define FP_XSTATE_MAGIC2 0x46505845U
#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
/*
* bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
* are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
* are used to extended the fpstate pointer in the sigcontext, which now
* includes the extended state information along with fpstate information.
*
* Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
* area and FP_XSTATE_MAGIC2 at the end of memory layout
* (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
* extended state information in the memory layout pointed by the fpstate
* pointer in sigcontext.
*/
struct _fpx_sw_bytes {
__u32 magic1; /* FP_XSTATE_MAGIC1 */
__u32 extended_size; /* total size of the layout referred by
* fpstate pointer in the sigcontext.
*/
__u64 xstate_bv;
/* feature bit mask (including fp/sse/extended
* state) that is present in the memory
* layout.
*/
__u32 xstate_size; /* actual xsave state size, based on the
* features saved in the layout.
* 'extended_size' will be greater than
* 'xstate_size'.
*/
__u32 padding[7]; /* for future use. */
};
#ifdef __i386__ #ifdef __i386__
/* /*
* As documented in the iBCS2 standard.. * As documented in the iBCS2 standard..
@ -53,7 +87,13 @@ struct _fpstate {
unsigned long reserved; unsigned long reserved;
struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
struct _xmmreg _xmm[8]; struct _xmmreg _xmm[8];
unsigned long padding[56]; unsigned long padding1[44];
union {
unsigned long padding2[12];
struct _fpx_sw_bytes sw_reserved; /* represents the extended
* state info */
};
}; };
#define X86_FXSR_MAGIC 0x0000 #define X86_FXSR_MAGIC 0x0000
@ -79,7 +119,15 @@ struct sigcontext {
unsigned long flags; unsigned long flags;
unsigned long sp_at_signal; unsigned long sp_at_signal;
unsigned short ss, __ssh; unsigned short ss, __ssh;
struct _fpstate __user *fpstate;
/*
* fpstate is really (struct _fpstate *) or (struct _xstate *)
* depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
* bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
* of extended memory layout. See comments at the defintion of
* (struct _fpx_sw_bytes)
*/
void __user *fpstate; /* zero when no FPU/extended context */
unsigned long oldmask; unsigned long oldmask;
unsigned long cr2; unsigned long cr2;
}; };
@ -130,7 +178,12 @@ struct _fpstate {
__u32 mxcsr_mask; __u32 mxcsr_mask;
__u32 st_space[32]; /* 8*16 bytes for each FP-reg */ __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
__u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
__u32 reserved2[24]; __u32 reserved2[12];
union {
__u32 reserved3[12];
struct _fpx_sw_bytes sw_reserved; /* represents the extended
* state information */
};
}; };
#ifdef __KERNEL__ #ifdef __KERNEL__
@ -161,7 +214,15 @@ struct sigcontext {
unsigned long trapno; unsigned long trapno;
unsigned long oldmask; unsigned long oldmask;
unsigned long cr2; unsigned long cr2;
struct _fpstate __user *fpstate; /* zero when no FPU context */
/*
* fpstate is really (struct _fpstate *) or (struct _xstate *)
* depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
* bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
* of extended memory layout. See comments at the defintion of
* (struct _fpx_sw_bytes)
*/
void __user *fpstate; /* zero when no FPU/extended context */
unsigned long reserved1[8]; unsigned long reserved1[8];
}; };
#else /* __KERNEL__ */ #else /* __KERNEL__ */
@ -202,4 +263,22 @@ struct sigcontext {
#endif /* !__i386__ */ #endif /* !__i386__ */
struct _xsave_hdr {
__u64 xstate_bv;
__u64 reserved1[2];
__u64 reserved2[5];
};
/*
* Extended state pointed by the fpstate pointer in the sigcontext.
* In addition to the fpstate, information encoded in the xstate_hdr
* indicates the presence of other extended state information
* supported by the processor and OS.
*/
struct _xstate {
struct _fpstate fpstate;
struct _xsave_hdr xstate_hdr;
/* new processor state extensions go here */
};
#endif /* ASM_X86__SIGCONTEXT_H */ #endif /* ASM_X86__SIGCONTEXT_H */

View File

@ -40,7 +40,11 @@ struct _fpstate_ia32 {
__u32 reserved; __u32 reserved;
struct _fpxreg _fxsr_st[8]; struct _fpxreg _fxsr_st[8];
struct _xmmreg _xmm[8]; /* It's actually 16 */ struct _xmmreg _xmm[8]; /* It's actually 16 */
__u32 padding[56]; __u32 padding[44];
union {
__u32 padding2[12];
struct _fpx_sw_bytes sw_reserved;
};
}; };
struct sigcontext_ia32 { struct sigcontext_ia32 {

View File

@ -239,6 +239,7 @@ static inline struct thread_info *stack_thread_info(void)
#define TS_POLLING 0x0004 /* true if in idle loop #define TS_POLLING 0x0004 /* true if in idle loop
and not sleeping */ and not sleeping */
#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ #define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
#define TS_XSAVE 0x0010 /* Use xsave/xrstor */
#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) #define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)

View File

@ -1,6 +1,12 @@
#ifndef ASM_X86__UCONTEXT_H #ifndef ASM_X86__UCONTEXT_H
#define ASM_X86__UCONTEXT_H #define ASM_X86__UCONTEXT_H
#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
* information in the memory layout pointed
* by the fpstate pointer in the ucontext's
* sigcontext struct (uc_mcontext).
*/
struct ucontext { struct ucontext {
unsigned long uc_flags; unsigned long uc_flags;
struct ucontext *uc_link; struct ucontext *uc_link;

49
include/asm-x86/xcr.h Normal file
View File

@ -0,0 +1,49 @@
/* -*- linux-c -*- ------------------------------------------------------- *
*
* Copyright 2008 rPath, Inc. - All Rights Reserved
*
* This file is part of the Linux kernel, and is made available under
* the terms of the GNU General Public License version 2 or (at your
* option) any later version; incorporated herein by reference.
*
* ----------------------------------------------------------------------- */
/*
* asm-x86/xcr.h
*
* Definitions for the eXtended Control Register instructions
*/
#ifndef _ASM_X86_XCR_H
#define _ASM_X86_XCR_H
#define XCR_XFEATURE_ENABLED_MASK 0x00000000
#ifdef __KERNEL__
# ifndef __ASSEMBLY__
#include <linux/types.h>
static inline u64 xgetbv(u32 index)
{
u32 eax, edx;
asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
: "=a" (eax), "=d" (edx)
: "c" (index));
return eax + ((u64)edx << 32);
}
static inline void xsetbv(u32 index, u64 value)
{
u32 eax = value;
u32 edx = value >> 32;
asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
: : "a" (eax), "d" (edx), "c" (index));
}
# endif /* __ASSEMBLY__ */
#endif /* __KERNEL__ */
#endif /* _ASM_X86_XCR_H */

118
include/asm-x86/xsave.h Normal file
View File

@ -0,0 +1,118 @@
#ifndef __ASM_X86_XSAVE_H
#define __ASM_X86_XSAVE_H
#include <linux/types.h>
#include <asm/processor.h>
#include <asm/i387.h>
#define XSTATE_FP 0x1
#define XSTATE_SSE 0x2
#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
#define FXSAVE_SIZE 512
/*
* These are the features that the OS can handle currently.
*/
#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE)
#ifdef CONFIG_X86_64
#define REX_PREFIX "0x48, "
#else
#define REX_PREFIX
#endif
extern unsigned int xstate_size;
extern u64 pcntxt_mask;
extern struct xsave_struct *init_xstate_buf;
extern void xsave_cntxt_init(void);
extern void xsave_init(void);
extern int init_fpu(struct task_struct *child);
extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
void __user *fpstate,
struct _fpx_sw_bytes *sw);
static inline int xrstor_checking(struct xsave_struct *fx)
{
int err;
asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
"2:\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
_ASM_EXTABLE(1b, 3b)
: [err] "=r" (err)
: "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
: "memory");
return err;
}
static inline int xsave_user(struct xsave_struct __user *buf)
{
int err;
__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
"2:\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
_ASM_ALIGN "\n"
_ASM_PTR "1b,3b\n"
".previous"
: [err] "=r" (err)
: "D" (buf), "a" (-1), "d" (-1), "0" (0)
: "memory");
if (unlikely(err) && __clear_user(buf, xstate_size))
err = -EFAULT;
/* No need to clear here because the caller clears USED_MATH */
return err;
}
static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
{
int err;
struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
u32 lmask = mask;
u32 hmask = mask >> 32;
__asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
"2:\n"
".section .fixup,\"ax\"\n"
"3: movl $-1,%[err]\n"
" jmp 2b\n"
".previous\n"
".section __ex_table,\"a\"\n"
_ASM_ALIGN "\n"
_ASM_PTR "1b,3b\n"
".previous"
: [err] "=r" (err)
: "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
: "memory"); /* memory required? */
return err;
}
static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
{
u32 lmask = mask;
u32 hmask = mask >> 32;
asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
: : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
: "memory");
}
static inline void xsave(struct task_struct *tsk)
{
/* This, however, we can work around by forcing the compiler to select
an addressing mode that doesn't require extended registers. */
__asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
: : "D" (&(tsk->thread.xstate->xsave)),
"a" (-1), "d"(-1) : "memory");
}
#endif