From dc1e35c6e95e8923cf1d3510438b63c600fee1e2 Mon Sep 17 00:00:00 2001
From: Suresh Siddha <suresh.b.siddha@intel.com>
Date: Tue, 29 Jul 2008 10:29:19 -0700
Subject: [PATCH] x86, xsave: enable xsave/xrstor on cpus with xsave support

Enables xsave/xrstor by turning on cr4.osxsave on cpu's which have
the xsave support. For now, features that OS supports/enabled are
FP and SSE.

Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com>
Signed-off-by: H. Peter Anvin <hpa@zytor.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/Makefile          |  2 +-
 arch/x86/kernel/cpu/common.c      |  8 +++
 arch/x86/kernel/i387.c            | 12 +++++
 arch/x86/kernel/traps_32.c        |  1 -
 arch/x86/kernel/traps_64.c        |  4 --
 arch/x86/kernel/xsave.c           | 87 +++++++++++++++++++++++++++++++
 include/asm-x86/i387.h            |  1 +
 include/asm-x86/processor-flags.h |  1 +
 include/asm-x86/processor.h       | 12 +++++
 include/asm-x86/xsave.h           | 26 +++++++++
 10 files changed, 148 insertions(+), 6 deletions(-)
 create mode 100644 arch/x86/kernel/xsave.c
 create mode 100644 include/asm-x86/xsave.h

diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a07ec14f33122..d6ea91abaebc1 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y			+= tsc.o io_delay.o rtc.o
 
 obj-$(CONFIG_X86_TRAMPOLINE)	+= trampoline.o
 obj-y				+= process.o
-obj-y				+= i387.o
+obj-y				+= i387.o xsave.o
 obj-y				+= ptrace.o
 obj-y				+= ds.o
 obj-$(CONFIG_X86_32)		+= tls.o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 80ab20d4fa399..fabbcb7020fb6 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -712,6 +712,14 @@ void __cpuinit cpu_init(void)
 	current_thread_info()->status = 0;
 	clear_used_math();
 	mxcsr_feature_mask_init();
+
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+
+	xsave_init();
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb828..e22a9a9dce8a8 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -61,6 +61,11 @@ void __init init_thread_xstate(void)
 		return;
 	}
 
+	if (cpu_has_xsave) {
+		xsave_cntxt_init();
+		return;
+	}
+
 	if (cpu_has_fxsr)
 		xstate_size = sizeof(struct i387_fxsave_struct);
 #ifdef CONFIG_X86_32
@@ -83,6 +88,13 @@ void __cpuinit fpu_init(void)
 
 	write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
 
+	/*
+	 * Boot processor to setup the FP and extended state context info.
+	 */
+	if (!smp_processor_id())
+		init_thread_xstate();
+	xsave_init();
+
 	mxcsr_feature_mask_init();
 	/* clean state in init */
 	current_thread_info()->status = 0;
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a15..da5a5964fccb2 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
 
 	set_bit(SYSCALL_VECTOR, used_vectors);
 
-	init_thread_xstate();
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 513caaca7115e..3580a7938a2e8 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1172,10 +1172,6 @@ void __init trap_init(void)
 #ifdef CONFIG_IA32_EMULATION
 	set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
 #endif
-	/*
-	 * initialize the per thread extended state:
-	 */
-	init_thread_xstate();
 	/*
 	 * Should be a barrier for any external CPU state:
 	 */
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 0000000000000..c68b7c4ca249d
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,87 @@
+/*
+ * xsave/xrstor support.
+ *
+ * Author: Suresh Siddha <suresh.b.siddha@intel.com>
+ */
+#include <linux/bootmem.h>
+#include <linux/compat.h>
+#include <asm/i387.h>
+
+/*
+ * Supported feature mask by the CPU and the kernel.
+ */
+unsigned int pcntxt_hmask, pcntxt_lmask;
+
+/*
+ * Represents init state for the supported extended state.
+ */
+struct xsave_struct *init_xstate_buf;
+
+/*
+ * Enable the extended processor state save/restore feature
+ */
+void __cpuinit xsave_init(void)
+{
+	if (!cpu_has_xsave)
+		return;
+
+	set_in_cr4(X86_CR4_OSXSAVE);
+
+	/*
+	 * Enable all the features that the HW is capable of
+	 * and the Linux kernel is aware of.
+	 *
+	 * xsetbv();
+	 */
+	asm volatile(".byte 0x0f,0x01,0xd1" : : "c" (0),
+		     "a" (pcntxt_lmask), "d" (pcntxt_hmask));
+}
+
+/*
+ * setup the xstate image representing the init state
+ */
+void setup_xstate_init(void)
+{
+	init_xstate_buf = alloc_bootmem(xstate_size);
+	init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
+}
+
+/*
+ * Enable and initialize the xsave feature.
+ */
+void __init xsave_cntxt_init(void)
+{
+	unsigned int eax, ebx, ecx, edx;
+
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+
+	pcntxt_lmask = eax;
+	pcntxt_hmask = edx;
+
+	if ((pcntxt_lmask & XSTATE_FPSSE) != XSTATE_FPSSE) {
+		printk(KERN_ERR "FP/SSE not shown under xsave features %x\n",
+		       pcntxt_lmask);
+		BUG();
+	}
+
+	/*
+	 * for now OS knows only about FP/SSE
+	 */
+	pcntxt_lmask = pcntxt_lmask & XCNTXT_LMASK;
+	pcntxt_hmask = pcntxt_hmask & XCNTXT_HMASK;
+
+	xsave_init();
+
+	/*
+	 * Recompute the context size for enabled features
+	 */
+	cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
+
+	xstate_size = ebx;
+
+	setup_xstate_init();
+
+	printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%Lx, "
+	       "cntxt size 0x%x\n",
+	       (pcntxt_lmask | ((u64) pcntxt_hmask << 32)), xstate_size);
+}
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 3958de6aad0ee..6a66478966703 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -18,6 +18,7 @@
 #include <asm/sigcontext.h>
 #include <asm/user.h>
 #include <asm/uaccess.h>
+#include <asm/xsave.h>
 
 extern void fpu_init(void);
 extern void mxcsr_feature_mask_init(void);
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 5dd79774f693c..dc5f0712f9fa4 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -59,6 +59,7 @@
 #define X86_CR4_OSFXSR	0x00000200 /* enable fast FPU save and restore */
 #define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
 #define X86_CR4_VMXE	0x00002000 /* enable VMX virtualization */
+#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
 
 /*
  * x86-64 Task Priority Register, CR8
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index d60b4d81febef..d7c0221c02780 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -346,6 +346,18 @@ struct i387_soft_struct {
 	u32			entry_eip;
 };
 
+struct xsave_hdr_struct {
+	u64 xstate_bv;
+	u64 reserved1[2];
+	u64 reserved2[5];
+} __attribute__((packed));
+
+struct xsave_struct {
+	struct i387_fxsave_struct i387;
+	struct xsave_hdr_struct xsave_hdr;
+	/* new processor state extensions will go here */
+} __attribute__ ((packed, aligned (64)));
+
 union thread_xstate {
 	struct i387_fsave_struct	fsave;
 	struct i387_fxsave_struct	fxsave;
diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h
new file mode 100644
index 0000000000000..6d70e62c6bdc1
--- /dev/null
+++ b/include/asm-x86/xsave.h
@@ -0,0 +1,26 @@
+#ifndef __ASM_X86_XSAVE_H
+#define __ASM_X86_XSAVE_H
+
+#include <asm/processor.h>
+#include <asm/i387.h>
+
+#define XSTATE_FP	0x1
+#define XSTATE_SSE	0x2
+
+#define XSTATE_FPSSE	(XSTATE_FP | XSTATE_SSE)
+
+#define FXSAVE_SIZE	512
+
+/*
+ * These are the features that the OS can handle currently.
+ */
+#define XCNTXT_LMASK	(XSTATE_FP | XSTATE_SSE)
+#define XCNTXT_HMASK	0x0
+
+extern unsigned int xstate_size, pcntxt_hmask, pcntxt_lmask;
+extern struct xsave_struct *init_xstate_buf;
+
+extern void xsave_cntxt_init(void);
+extern void xsave_init(void);
+
+#endif
-- 
GitLab