x86/i386: Make sure stack-protector segment base is cache aligned
The Intel Optimization Reference Guide says:
In Intel Atom microarchitecture, the address generation unit
assumes that the segment base will be 0 by default. Non-zero
segment base will cause load and store operations to experience
a delay.
- If the segment base isn't aligned to a cache line
boundary, the max throughput of memory operations is
reduced to one [e]very 9 cycles.
[...]
Assembly/Compiler Coding Rule 15. (H impact, ML generality)
For Intel Atom processors, use segments with base set to 0
whenever possible; avoid non-zero segment base address that is
not aligned to cache line boundary at all cost.
We can't avoid having a non-zero base for the stack-protector
segment, but we can make it cache-aligned.
Signed-off-by: Jeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com>
Cc: <stable@kernel.org>
LKML-Reference: <4AA01893.6000507@goop.org>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index c776826..e597ecc 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -403,7 +403,17 @@
extern asmlinkage void ignore_sysret(void);
#else /* X86_64 */
#ifdef CONFIG_CC_STACKPROTECTOR
-DECLARE_PER_CPU(unsigned long, stack_canary);
+/*
+ * Make sure stack canary segment base is cached-aligned:
+ * "For Intel Atom processors, avoid non zero segment base address
+ * that is not aligned to cache line boundary at all cost."
+ * (Optim Ref Manual Assembly/Compiler Coding Rule 15.)
+ */
+struct stack_canary {
+ char __pad[20]; /* canary at %gs:20 */
+ unsigned long canary;
+};
+DECLARE_PER_CPU(struct stack_canary, stack_canary) ____cacheline_aligned;
#endif
#endif /* X86_64 */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
index 44efdff..1575177 100644
--- a/arch/x86/include/asm/stackprotector.h
+++ b/arch/x86/include/asm/stackprotector.h
@@ -78,14 +78,14 @@
#ifdef CONFIG_X86_64
percpu_write(irq_stack_union.stack_canary, canary);
#else
- percpu_write(stack_canary, canary);
+ percpu_write(stack_canary.canary, canary);
#endif
}
static inline void setup_stack_canary_segment(int cpu)
{
#ifdef CONFIG_X86_32
- unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu) - 20;
+ unsigned long canary = (unsigned long)&per_cpu(stack_canary, cpu);
struct desc_struct *gdt_table = get_cpu_gdt_table(cpu);
struct desc_struct desc;
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 75c49c7..f08f973 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -31,7 +31,7 @@
"movl %P[task_canary](%[next]), %%ebx\n\t" \
"movl %%ebx, "__percpu_arg([stack_canary])"\n\t"
#define __switch_canary_oparam \
- , [stack_canary] "=m" (per_cpu_var(stack_canary))
+ , [stack_canary] "=m" (per_cpu_var(stack_canary.canary))
#define __switch_canary_iparam \
, [task_canary] "i" (offsetof(struct task_struct, stack_canary))
#else /* CC_STACKPROTECTOR */