percpu: align percpu readmostly subsection to cacheline
Currently percpu readmostly subsection may share cachelines with other
percpu subsections which may result in unnecessary cacheline bounce
and performance degradation.
This patch adds @cacheline parameter to PERCPU() and PERCPU_VADDR()
linker macros, makes each arch linker scripts specify its cacheline
size and use it to align percpu subsections.
This is based on Shaohua's x86 only patch.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Shaohua Li <shaohua.li@intel.com>
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index bf47007..cef446f 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -230,7 +230,7 @@
* output PHDR, so the next output section - .init.text - should
* start another segment - init.
*/
- PERCPU_VADDR(0, :percpu)
+ PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)
#endif
INIT_TEXT_SECTION(PAGE_SIZE)
@@ -305,7 +305,7 @@
}
#if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)
- PERCPU(THREAD_SIZE)
+ PERCPU(INTERNODE_CACHE_BYTES, THREAD_SIZE)
#endif
. = ALIGN(PAGE_SIZE);