blob: 4db35544de738f29f33051c444ad40b998c0e3b1 [file] [log] [blame]
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08001#include <linux/module.h>
Al Virof6a57032006-10-18 01:47:25 -04002#include <linux/sched.h>
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +02003#include <linux/mutex.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -08004#include <linux/list.h>
Jan Beulich8b5a10f2009-08-19 08:40:48 +01005#include <linux/stringify.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +02006#include <linux/kprobes.h>
7#include <linux/mm.h>
8#include <linux/vmalloc.h>
Masami Hiramatsu3945dab2009-03-06 10:37:22 -05009#include <linux/memory.h>
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -050010#include <linux/stop_machine.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090011#include <linux/slab.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080012#include <asm/alternative.h>
13#include <asm/sections.h>
Andi Kleen19d36cc2007-07-22 11:12:31 +020014#include <asm/pgtable.h>
Andi Kleen8f4e9562007-07-22 11:12:32 +020015#include <asm/mce.h>
16#include <asm/nmi.h>
Dave Jonesb0979762007-10-14 22:57:45 +020017#include <asm/vsyscall.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050018#include <asm/cacheflush.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050019#include <asm/tlbflush.h>
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -050020#include <asm/io.h>
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -050021#include <asm/fixmap.h>
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080022
Andi Kleenab144f52007-08-10 22:31:03 +020023#define MAX_PATCH_LEN (255-1)
24
Jan Beulich09488162007-07-21 17:10:25 +020025#ifdef CONFIG_HOTPLUG_CPU
26static int smp_alt_once;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080027
Gerd Hoffmannd167a512006-06-26 13:56:16 +020028static int __init bootonly(char *str)
29{
30 smp_alt_once = 1;
31 return 1;
32}
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020033__setup("smp-alt-boot", bootonly);
Jan Beulich09488162007-07-21 17:10:25 +020034#else
35#define smp_alt_once 1
36#endif
37
Jan Beulich8b5a10f2009-08-19 08:40:48 +010038static int __initdata_or_module debug_alternative;
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020039
Gerd Hoffmannd167a512006-06-26 13:56:16 +020040static int __init debug_alt(char *str)
41{
42 debug_alternative = 1;
43 return 1;
44}
Gerd Hoffmannd167a512006-06-26 13:56:16 +020045__setup("debug-alternative", debug_alt);
46
Jan Beulich09488162007-07-21 17:10:25 +020047static int noreplace_smp;
48
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020049static int __init setup_noreplace_smp(char *str)
50{
51 noreplace_smp = 1;
52 return 1;
53}
54__setup("noreplace-smp", setup_noreplace_smp);
55
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +020056#ifdef CONFIG_PARAVIRT
Jan Beulich8b5a10f2009-08-19 08:40:48 +010057static int __initdata_or_module noreplace_paravirt = 0;
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +020058
59static int __init setup_noreplace_paravirt(char *str)
60{
61 noreplace_paravirt = 1;
62 return 1;
63}
64__setup("noreplace-paravirt", setup_noreplace_paravirt);
65#endif
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +020066
Gerd Hoffmannd167a512006-06-26 13:56:16 +020067#define DPRINTK(fmt, args...) if (debug_alternative) \
68 printk(KERN_DEBUG fmt, args)
69
Jan Beulich8b5a10f2009-08-19 08:40:48 +010070#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080071/* Use inline assembly to define this because the nops are defined
72 as inline assembly strings in the include files and we cannot
73 get them easily into strings. */
Jan Beulich8b5a10f2009-08-19 08:40:48 +010074asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080075 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
Steven Rostedtf4be31e2008-04-09 19:04:07 -040076 GENERIC_NOP7 GENERIC_NOP8
77 "\t.previous");
Jan Beulich121d7bf2007-10-17 18:04:37 +020078extern const unsigned char intelnops[];
Jan Beulich8b5a10f2009-08-19 08:40:48 +010079static const unsigned char *const __initconst_or_module
80intel_nops[ASM_NOP_MAX+1] = {
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -080081 NULL,
82 intelnops,
83 intelnops + 1,
84 intelnops + 1 + 2,
85 intelnops + 1 + 2 + 3,
86 intelnops + 1 + 2 + 3 + 4,
87 intelnops + 1 + 2 + 3 + 4 + 5,
88 intelnops + 1 + 2 + 3 + 4 + 5 + 6,
89 intelnops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
90};
Gerd Hoffmannd167a512006-06-26 13:56:16 +020091#endif
92
93#ifdef K8_NOP1
Jan Beulich8b5a10f2009-08-19 08:40:48 +010094asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
Gerd Hoffmannd167a512006-06-26 13:56:16 +020095 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
Steven Rostedtf4be31e2008-04-09 19:04:07 -040096 K8_NOP7 K8_NOP8
97 "\t.previous");
Jan Beulich121d7bf2007-10-17 18:04:37 +020098extern const unsigned char k8nops[];
Jan Beulich8b5a10f2009-08-19 08:40:48 +010099static const unsigned char *const __initconst_or_module
100k8_nops[ASM_NOP_MAX+1] = {
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800101 NULL,
102 k8nops,
103 k8nops + 1,
104 k8nops + 1 + 2,
105 k8nops + 1 + 2 + 3,
106 k8nops + 1 + 2 + 3 + 4,
107 k8nops + 1 + 2 + 3 + 4 + 5,
108 k8nops + 1 + 2 + 3 + 4 + 5 + 6,
109 k8nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
110};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200111#endif
112
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100113#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
114asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200115 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
Steven Rostedtf4be31e2008-04-09 19:04:07 -0400116 K7_NOP7 K7_NOP8
117 "\t.previous");
Jan Beulich121d7bf2007-10-17 18:04:37 +0200118extern const unsigned char k7nops[];
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100119static const unsigned char *const __initconst_or_module
120k7_nops[ASM_NOP_MAX+1] = {
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800121 NULL,
122 k7nops,
123 k7nops + 1,
124 k7nops + 1 + 2,
125 k7nops + 1 + 2 + 3,
126 k7nops + 1 + 2 + 3 + 4,
127 k7nops + 1 + 2 + 3 + 4 + 5,
128 k7nops + 1 + 2 + 3 + 4 + 5 + 6,
129 k7nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
130};
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200131#endif
132
Jan Beulich32c464f2007-10-17 18:04:41 +0200133#ifdef P6_NOP1
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100134asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
Jan Beulich32c464f2007-10-17 18:04:41 +0200135 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
Steven Rostedtf4be31e2008-04-09 19:04:07 -0400136 P6_NOP7 P6_NOP8
137 "\t.previous");
Jan Beulich32c464f2007-10-17 18:04:41 +0200138extern const unsigned char p6nops[];
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100139static const unsigned char *const __initconst_or_module
140p6_nops[ASM_NOP_MAX+1] = {
Jan Beulich32c464f2007-10-17 18:04:41 +0200141 NULL,
142 p6nops,
143 p6nops + 1,
144 p6nops + 1 + 2,
145 p6nops + 1 + 2 + 3,
146 p6nops + 1 + 2 + 3 + 4,
147 p6nops + 1 + 2 + 3 + 4 + 5,
148 p6nops + 1 + 2 + 3 + 4 + 5 + 6,
149 p6nops + 1 + 2 + 3 + 4 + 5 + 6 + 7,
150};
151#endif
152
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200153#ifdef CONFIG_X86_64
154
155extern char __vsyscall_0;
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100156static const unsigned char *const *__init_or_module find_nop_table(void)
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200157{
H. Peter Anvinf31d7312008-08-18 17:50:33 -0700158 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
159 boot_cpu_has(X86_FEATURE_NOPL))
160 return p6_nops;
161 else
162 return k8_nops;
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200163}
164
165#else /* CONFIG_X86_64 */
166
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100167static const unsigned char *const *__init_or_module find_nop_table(void)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800168{
H. Peter Anvinf31d7312008-08-18 17:50:33 -0700169 if (boot_cpu_has(X86_FEATURE_K8))
170 return k8_nops;
171 else if (boot_cpu_has(X86_FEATURE_K7))
172 return k7_nops;
173 else if (boot_cpu_has(X86_FEATURE_NOPL))
174 return p6_nops;
175 else
176 return intel_nops;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800177}
178
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200179#endif /* CONFIG_X86_64 */
180
Andi Kleenab144f52007-08-10 22:31:03 +0200181/* Use this to add nops to a buffer, then text_poke the whole buffer. */
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100182static void __init_or_module add_nops(void *insns, unsigned int len)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100183{
Jan Beulich121d7bf2007-10-17 18:04:37 +0200184 const unsigned char *const *noptable = find_nop_table();
Rusty Russell139ec7c2006-12-07 02:14:08 +0100185
186 while (len > 0) {
187 unsigned int noplen = len;
188 if (noplen > ASM_NOP_MAX)
189 noplen = ASM_NOP_MAX;
Andi Kleenab144f52007-08-10 22:31:03 +0200190 memcpy(insns, noptable[noplen], noplen);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100191 insns += noplen;
192 len -= noplen;
193 }
194}
195
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200196extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
Jan Beulich5967ed82010-04-21 16:08:14 +0100197extern s32 __smp_locks[], __smp_locks_end[];
Jason Baronfa6f2cc2010-09-17 11:08:56 -0400198void *text_poke_early(void *addr, const void *opcode, size_t len);
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200199
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800200/* Replace instructions with better alternatives for this CPU type.
201 This runs before SMP is initialized to avoid SMP problems with
202 self modifying code. This implies that assymetric systems where
203 APs have less capabilities than the boot processor are not handled.
204 Tough. Make sure you disable such features by hand. */
205
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100206void __init_or_module apply_alternatives(struct alt_instr *start,
207 struct alt_instr *end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800208{
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800209 struct alt_instr *a;
Jan Beulich1b1d9252009-12-18 16:12:56 +0000210 u8 insnbuf[MAX_PATCH_LEN];
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800211
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800212 DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800213 for (a = start; a < end; a++) {
Andi Kleenab144f52007-08-10 22:31:03 +0200214 u8 *instr = a->instr;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800215 BUG_ON(a->replacementlen > a->instrlen);
Andi Kleenab144f52007-08-10 22:31:03 +0200216 BUG_ON(a->instrlen > sizeof(insnbuf));
H. Peter Anvin3b770a22010-07-13 14:57:50 -0700217 BUG_ON(a->cpuid >= NCAPINTS*32);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800218 if (!boot_cpu_has(a->cpuid))
219 continue;
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200220#ifdef CONFIG_X86_64
221 /* vsyscall code is not mapped yet. resolve it manually. */
222 if (instr >= (u8 *)VSYSCALL_START && instr < (u8*)VSYSCALL_END) {
223 instr = __va(instr - (u8*)VSYSCALL_START + (u8*)__pa_symbol(&__vsyscall_0));
224 DPRINTK("%s: vsyscall fixup: %p => %p\n",
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800225 __func__, a->instr, instr);
Gerd Hoffmannd167a512006-06-26 13:56:16 +0200226 }
227#endif
Andi Kleenab144f52007-08-10 22:31:03 +0200228 memcpy(insnbuf, a->replacement, a->replacementlen);
Jan Beulich1b1d9252009-12-18 16:12:56 +0000229 if (*insnbuf == 0xe8 && a->replacementlen == 5)
230 *(s32 *)(insnbuf + 1) += a->replacement - a->instr;
Andi Kleenab144f52007-08-10 22:31:03 +0200231 add_nops(insnbuf + a->replacementlen,
232 a->instrlen - a->replacementlen);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500233 text_poke_early(instr, insnbuf, a->instrlen);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800234 }
235}
236
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700237#ifdef CONFIG_SMP
238
Jan Beulich5967ed82010-04-21 16:08:14 +0100239static void alternatives_smp_lock(const s32 *start, const s32 *end,
240 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800241{
Jan Beulich5967ed82010-04-21 16:08:14 +0100242 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800243
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500244 mutex_lock(&text_mutex);
Jan Beulich5967ed82010-04-21 16:08:14 +0100245 for (poff = start; poff < end; poff++) {
246 u8 *ptr = (u8 *)poff + *poff;
247
248 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800249 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400250 /* turn DS segment override prefix into lock prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700251 if (*ptr == 0x3e)
252 text_poke(ptr, ((unsigned char []){0xf0}), 1);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800253 };
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500254 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800255}
256
Jan Beulich5967ed82010-04-21 16:08:14 +0100257static void alternatives_smp_unlock(const s32 *start, const s32 *end,
258 u8 *text, u8 *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800259{
Jan Beulich5967ed82010-04-21 16:08:14 +0100260 const s32 *poff;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800261
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +0200262 if (noreplace_smp)
263 return;
264
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500265 mutex_lock(&text_mutex);
Jan Beulich5967ed82010-04-21 16:08:14 +0100266 for (poff = start; poff < end; poff++) {
267 u8 *ptr = (u8 *)poff + *poff;
268
269 if (!*poff || ptr < text || ptr >= text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800270 continue;
Mathieu Desnoyersf88f07e2008-08-14 16:58:15 -0400271 /* turn lock prefix into DS segment override prefix */
H. Peter Anvind9c58412010-04-29 16:53:17 -0700272 if (*ptr == 0xf0)
273 text_poke(ptr, ((unsigned char []){0x3E}), 1);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800274 };
Masami Hiramatsu3945dab2009-03-06 10:37:22 -0500275 mutex_unlock(&text_mutex);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800276}
277
278struct smp_alt_module {
279 /* what is this ??? */
280 struct module *mod;
281 char *name;
282
283 /* ptrs to lock prefixes */
Jan Beulich5967ed82010-04-21 16:08:14 +0100284 const s32 *locks;
285 const s32 *locks_end;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800286
287 /* .text segment, needed to avoid patching init code ;) */
288 u8 *text;
289 u8 *text_end;
290
291 struct list_head next;
292};
293static LIST_HEAD(smp_alt_modules);
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200294static DEFINE_MUTEX(smp_alt);
Andi Kleenca74a6f2008-01-30 13:33:17 +0100295static int smp_mode = 1; /* protected by smp_alt */
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800296
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100297void __init_or_module alternatives_smp_module_add(struct module *mod,
298 char *name,
299 void *locks, void *locks_end,
300 void *text, void *text_end)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800301{
302 struct smp_alt_module *smp;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800303
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +0200304 if (noreplace_smp)
305 return;
306
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800307 if (smp_alt_once) {
308 if (boot_cpu_has(X86_FEATURE_UP))
309 alternatives_smp_unlock(locks, locks_end,
310 text, text_end);
311 return;
312 }
313
314 smp = kzalloc(sizeof(*smp), GFP_KERNEL);
315 if (NULL == smp)
316 return; /* we'll run the (safe but slow) SMP code then ... */
317
318 smp->mod = mod;
319 smp->name = name;
320 smp->locks = locks;
321 smp->locks_end = locks_end;
322 smp->text = text;
323 smp->text_end = text_end;
324 DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800325 __func__, smp->locks, smp->locks_end,
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800326 smp->text, smp->text_end, smp->name);
327
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200328 mutex_lock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800329 list_add_tail(&smp->next, &smp_alt_modules);
330 if (boot_cpu_has(X86_FEATURE_UP))
331 alternatives_smp_unlock(smp->locks, smp->locks_end,
332 smp->text, smp->text_end);
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200333 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800334}
335
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100336void __init_or_module alternatives_smp_module_del(struct module *mod)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800337{
338 struct smp_alt_module *item;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800339
Jeremy Fitzhardingeb7fb4af2007-05-02 19:27:13 +0200340 if (smp_alt_once || noreplace_smp)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800341 return;
342
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200343 mutex_lock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800344 list_for_each_entry(item, &smp_alt_modules, next) {
345 if (mod != item->mod)
346 continue;
347 list_del(&item->next);
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200348 mutex_unlock(&smp_alt);
Harvey Harrison77bf90e2008-03-03 11:37:23 -0800349 DPRINTK("%s: %s\n", __func__, item->name);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800350 kfree(item);
351 return;
352 }
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200353 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800354}
355
Suresh Siddha3fb82d52010-11-23 16:11:40 -0800356bool skip_smp_alternatives;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800357void alternatives_smp_switch(int smp)
358{
359 struct smp_alt_module *mod;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800360
Ingo Molnar3047e992006-07-03 00:24:57 -0700361#ifdef CONFIG_LOCKDEP
362 /*
Ingo Molnar17abecf2008-01-30 13:33:24 +0100363 * Older binutils section handling bug prevented
364 * alternatives-replacement from working reliably.
365 *
366 * If this still occurs then you should see a hang
367 * or crash shortly after this line:
Ingo Molnar3047e992006-07-03 00:24:57 -0700368 */
Ingo Molnar17abecf2008-01-30 13:33:24 +0100369 printk("lockdep: fixing up alternatives.\n");
Ingo Molnar3047e992006-07-03 00:24:57 -0700370#endif
371
Suresh Siddha3fb82d52010-11-23 16:11:40 -0800372 if (noreplace_smp || smp_alt_once || skip_smp_alternatives)
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800373 return;
374 BUG_ON(!smp && (num_online_cpus() > 1));
375
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200376 mutex_lock(&smp_alt);
Andi Kleenca74a6f2008-01-30 13:33:17 +0100377
378 /*
379 * Avoid unnecessary switches because it forces JIT based VMs to
380 * throw away all cached translations, which can be quite costly.
381 */
382 if (smp == smp_mode) {
383 /* nothing */
384 } else if (smp) {
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800385 printk(KERN_INFO "SMP alternatives: switching to SMP code\n");
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100386 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
387 clear_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800388 list_for_each_entry(mod, &smp_alt_modules, next)
389 alternatives_smp_lock(mod->locks, mod->locks_end,
390 mod->text, mod->text_end);
391 } else {
392 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100393 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
394 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800395 list_for_each_entry(mod, &smp_alt_modules, next)
396 alternatives_smp_unlock(mod->locks, mod->locks_end,
397 mod->text, mod->text_end);
398 }
Andi Kleenca74a6f2008-01-30 13:33:17 +0100399 smp_mode = smp;
Pekka Paalanen2f1dafe2008-05-12 21:21:01 +0200400 mutex_unlock(&smp_alt);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800401}
402
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500403/* Return 1 if the address range is reserved for smp-alternatives */
404int alternatives_text_reserved(void *start, void *end)
405{
406 struct smp_alt_module *mod;
Jan Beulich5967ed82010-04-21 16:08:14 +0100407 const s32 *poff;
Masami Hiramatsu076dc4a62010-02-05 12:16:47 -0500408 u8 *text_start = start;
409 u8 *text_end = end;
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500410
411 list_for_each_entry(mod, &smp_alt_modules, next) {
Masami Hiramatsu076dc4a62010-02-05 12:16:47 -0500412 if (mod->text > text_end || mod->text_end < text_start)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500413 continue;
Jan Beulich5967ed82010-04-21 16:08:14 +0100414 for (poff = mod->locks; poff < mod->locks_end; poff++) {
415 const u8 *ptr = (const u8 *)poff + *poff;
416
417 if (text_start <= ptr && text_end > ptr)
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500418 return 1;
Jan Beulich5967ed82010-04-21 16:08:14 +0100419 }
Masami Hiramatsu2cfa1972010-02-02 16:49:11 -0500420 }
421
422 return 0;
423}
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700424#endif
425
Rusty Russell139ec7c2006-12-07 02:14:08 +0100426#ifdef CONFIG_PARAVIRT
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100427void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
428 struct paravirt_patch_site *end)
Rusty Russell139ec7c2006-12-07 02:14:08 +0100429{
Jeremy Fitzhardinge98de0322007-05-02 19:27:14 +0200430 struct paravirt_patch_site *p;
Andi Kleenab144f52007-08-10 22:31:03 +0200431 char insnbuf[MAX_PATCH_LEN];
Rusty Russell139ec7c2006-12-07 02:14:08 +0100432
Jeremy Fitzhardinge959b4fd2007-05-02 19:27:16 +0200433 if (noreplace_paravirt)
434 return;
435
Rusty Russell139ec7c2006-12-07 02:14:08 +0100436 for (p = start; p < end; p++) {
437 unsigned int used;
438
Andi Kleenab144f52007-08-10 22:31:03 +0200439 BUG_ON(p->len > MAX_PATCH_LEN);
Chris Wrightd34fda42007-08-18 14:31:41 -0700440 /* prep the buffer with the original instructions */
441 memcpy(insnbuf, p->instr, p->len);
Jeremy Fitzhardinge93b1eab2007-10-16 11:51:29 -0700442 used = pv_init_ops.patch(p->instrtype, p->clobbers, insnbuf,
443 (unsigned long)p->instr, p->len);
Jeremy Fitzhardinge7f63c412007-05-02 19:27:13 +0200444
Jeremy Fitzhardinge63f70272007-05-02 19:27:14 +0200445 BUG_ON(used > p->len);
446
Rusty Russell139ec7c2006-12-07 02:14:08 +0100447 /* Pad the rest with nops */
Andi Kleenab144f52007-08-10 22:31:03 +0200448 add_nops(insnbuf + used, p->len - used);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500449 text_poke_early(p->instr, insnbuf, p->len);
Rusty Russell139ec7c2006-12-07 02:14:08 +0100450 }
Rusty Russell139ec7c2006-12-07 02:14:08 +0100451}
Jeremy Fitzhardinge98de0322007-05-02 19:27:14 +0200452extern struct paravirt_patch_site __start_parainstructions[],
Rusty Russell139ec7c2006-12-07 02:14:08 +0100453 __stop_parainstructions[];
454#endif /* CONFIG_PARAVIRT */
455
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800456void __init alternative_instructions(void)
457{
Andi Kleen8f4e9562007-07-22 11:12:32 +0200458 /* The patching is not fully atomic, so try to avoid local interruptions
459 that might execute the to be patched code.
460 Other CPUs are not running. */
461 stop_nmi();
Andi Kleen123aa762009-02-12 13:39:27 +0100462
463 /*
464 * Don't stop machine check exceptions while patching.
465 * MCEs only happen when something got corrupted and in this
466 * case we must do something about the corruption.
467 * Ignoring it is worse than a unlikely patching race.
468 * Also machine checks tend to be broadcast and if one CPU
469 * goes into machine check the others follow quickly, so we don't
470 * expect a machine check to cause undue problems during to code
471 * patching.
472 */
Andi Kleen8f4e9562007-07-22 11:12:32 +0200473
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800474 apply_alternatives(__alt_instructions, __alt_instructions_end);
475
476 /* switch to patch-once-at-boottime-only mode and free the
477 * tables in case we know the number of CPUs will never ever
478 * change */
479#ifdef CONFIG_HOTPLUG_CPU
480 if (num_possible_cpus() < 2)
481 smp_alt_once = 1;
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800482#endif
483
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700484#ifdef CONFIG_SMP
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800485 if (smp_alt_once) {
486 if (1 == num_possible_cpus()) {
487 printk(KERN_INFO "SMP alternatives: switching to UP code\n");
Jeremy Fitzhardinge53756d32008-01-30 13:30:55 +0100488 set_cpu_cap(&boot_cpu_data, X86_FEATURE_UP);
489 set_cpu_cap(&cpu_data(0), X86_FEATURE_UP);
490
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800491 alternatives_smp_unlock(__smp_locks, __smp_locks_end,
492 _text, _etext);
493 }
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800494 } else {
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800495 alternatives_smp_module_add(NULL, "core kernel",
496 __smp_locks, __smp_locks_end,
497 _text, _etext);
Andi Kleenca74a6f2008-01-30 13:33:17 +0100498
499 /* Only switch to UP mode if we don't immediately boot others */
Thomas Gleixner649c6652008-10-05 16:52:24 +0200500 if (num_present_cpus() == 1 || setup_max_cpus <= 1)
Andi Kleenca74a6f2008-01-30 13:33:17 +0100501 alternatives_smp_switch(0);
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800502 }
Gerd Hoffmann8ec4d412006-07-01 04:36:18 -0700503#endif
Jeremy Fitzhardinge441d40d2007-05-02 19:27:16 +0200504 apply_paravirt(__parainstructions, __parainstructions_end);
Andi Kleen8f4e9562007-07-22 11:12:32 +0200505
Fengguang Wuf68fd5f2007-10-17 18:04:34 +0200506 if (smp_alt_once)
507 free_init_pages("SMP alternatives",
508 (unsigned long)__smp_locks,
509 (unsigned long)__smp_locks_end);
510
Andi Kleen8f4e9562007-07-22 11:12:32 +0200511 restart_nmi();
Gerd Hoffmann9a0b5812006-03-23 02:59:32 -0800512}
Andi Kleen19d36cc2007-07-22 11:12:31 +0200513
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500514/**
515 * text_poke_early - Update instructions on a live kernel at boot time
516 * @addr: address to modify
517 * @opcode: source of the copy
518 * @len: length to copy
519 *
Andi Kleen19d36cc2007-07-22 11:12:31 +0200520 * When you use this code to patch more than one byte of an instruction
521 * you need to make sure that other CPUs cannot execute this code in parallel.
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500522 * Also no thread must be currently preempted in the middle of these
523 * instructions. And on the local CPU you need to be protected again NMI or MCE
524 * handlers seeing an inconsistent instruction while you patch.
Andi Kleen19d36cc2007-07-22 11:12:31 +0200525 */
Jason Baronfa6f2cc2010-09-17 11:08:56 -0400526void *__init_or_module text_poke_early(void *addr, const void *opcode,
Jan Beulich8b5a10f2009-08-19 08:40:48 +0100527 size_t len)
Andi Kleen19d36cc2007-07-22 11:12:31 +0200528{
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500529 unsigned long flags;
530 local_irq_save(flags);
Andi Kleen19d36cc2007-07-22 11:12:31 +0200531 memcpy(addr, opcode, len);
532 sync_core();
Ben Hutchings5367b682009-09-10 02:53:50 +0100533 local_irq_restore(flags);
Andi Kleena534b672007-09-06 16:59:52 +0200534 /* Could also do a CLFLUSH here to speed up CPU recovery; but
535 that causes hangs on some VIA CPUs. */
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500536 return addr;
537}
538
539/**
540 * text_poke - Update instructions on a live kernel
541 * @addr: address to modify
542 * @opcode: source of the copy
543 * @len: length to copy
544 *
545 * Only atomic text poke/set should be allowed when not doing early patching.
546 * It means the size must be writable atomically and the address must be aligned
547 * in a way that permits an atomic write. It also makes sure we fit on a single
548 * page.
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500549 *
550 * Note: Must be called under text_mutex.
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500551 */
552void *__kprobes text_poke(void *addr, const void *opcode, size_t len)
553{
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500554 unsigned long flags;
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500555 char *vaddr;
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400556 struct page *pages[2];
557 int i;
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500558
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400559 if (!core_kernel_text((unsigned long)addr)) {
560 pages[0] = vmalloc_to_page(addr);
561 pages[1] = vmalloc_to_page(addr + PAGE_SIZE);
Mathieu Desnoyers15a601e2008-03-12 11:54:16 -0400562 } else {
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400563 pages[0] = virt_to_page(addr);
Ingo Molnar00c6b2d2008-04-25 17:07:03 +0200564 WARN_ON(!PageReserved(pages[0]));
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400565 pages[1] = virt_to_page(addr + PAGE_SIZE);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500566 }
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400567 BUG_ON(!pages[0]);
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400568 local_irq_save(flags);
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500569 set_fixmap(FIX_TEXT_POKE0, page_to_phys(pages[0]));
570 if (pages[1])
571 set_fixmap(FIX_TEXT_POKE1, page_to_phys(pages[1]));
572 vaddr = (char *)fix_to_virt(FIX_TEXT_POKE0);
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400573 memcpy(&vaddr[(unsigned long)addr & ~PAGE_MASK], opcode, len);
Masami Hiramatsu78ff7fa2009-03-06 10:37:54 -0500574 clear_fixmap(FIX_TEXT_POKE0);
575 if (pages[1])
576 clear_fixmap(FIX_TEXT_POKE1);
577 local_flush_tlb();
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500578 sync_core();
579 /* Could also do a CLFLUSH here to speed up CPU recovery; but
580 that causes hangs on some VIA CPUs. */
Mathieu Desnoyersb7b66ba2008-04-24 11:03:33 -0400581 for (i = 0; i < len; i++)
582 BUG_ON(((char *)addr)[i] != ((char *)opcode)[i]);
Masami Hiramatsu7cf49422009-03-09 12:40:40 -0400583 local_irq_restore(flags);
Mathieu Desnoyerse587cad2008-03-06 08:48:49 -0500584 return addr;
Andi Kleen19d36cc2007-07-22 11:12:31 +0200585}
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500586
587/*
588 * Cross-modifying kernel text with stop_machine().
589 * This code originally comes from immediate value.
590 */
591static atomic_t stop_machine_first;
592static int wrote_text;
593
594struct text_poke_params {
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900595 struct text_poke_param *params;
596 int nparams;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500597};
598
599static int __kprobes stop_machine_text_poke(void *data)
600{
601 struct text_poke_params *tpp = data;
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900602 struct text_poke_param *p;
603 int i;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500604
605 if (atomic_dec_and_test(&stop_machine_first)) {
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900606 for (i = 0; i < tpp->nparams; i++) {
607 p = &tpp->params[i];
608 text_poke(p->addr, p->opcode, p->len);
609 }
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500610 smp_wmb(); /* Make sure other cpus see that this has run */
611 wrote_text = 1;
612 } else {
613 while (!wrote_text)
Masami Hiramatsue5a11012010-03-03 22:38:50 -0500614 cpu_relax();
615 smp_mb(); /* Load wrote_text before following execution */
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500616 }
617
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900618 for (i = 0; i < tpp->nparams; i++) {
619 p = &tpp->params[i];
620 flush_icache_range((unsigned long)p->addr,
621 (unsigned long)p->addr + p->len);
622 }
Mathieu Desnoyers0e00f7a2011-03-03 11:01:37 -0500623 /*
624 * Intel Archiecture Software Developer's Manual section 7.1.3 specifies
625 * that a core serializing instruction such as "cpuid" should be
626 * executed on _each_ core before the new instruction is made visible.
627 */
628 sync_core();
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500629 return 0;
630}
631
632/**
633 * text_poke_smp - Update instructions on a live kernel on SMP
634 * @addr: address to modify
635 * @opcode: source of the copy
636 * @len: length to copy
637 *
638 * Modify multi-byte instruction by using stop_machine() on SMP. This allows
639 * user to poke/set multi-byte text on SMP. Only non-NMI/MCE code modifying
640 * should be allowed, since stop_machine() does _not_ protect code against
641 * NMI and MCE.
642 *
643 * Note: Must be called under get_online_cpus() and text_mutex.
644 */
645void *__kprobes text_poke_smp(void *addr, const void *opcode, size_t len)
646{
647 struct text_poke_params tpp;
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900648 struct text_poke_param p;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500649
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900650 p.addr = addr;
651 p.opcode = opcode;
652 p.len = len;
653 tpp.params = &p;
654 tpp.nparams = 1;
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500655 atomic_set(&stop_machine_first, 1);
656 wrote_text = 0;
Masami Hiramatsu3caa37512010-10-14 12:10:36 +0900657 /* Use __stop_machine() because the caller already got online_cpus. */
Jason Baron404ba5d2010-10-28 11:20:27 -0400658 __stop_machine(stop_machine_text_poke, (void *)&tpp, cpu_online_mask);
Masami Hiramatsu3d55cc82010-02-25 08:34:38 -0500659 return addr;
660}
661
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900662/**
663 * text_poke_smp_batch - Update instructions on a live kernel on SMP
664 * @params: an array of text_poke parameters
665 * @n: the number of elements in params.
666 *
667 * Modify multi-byte instruction by using stop_machine() on SMP. Since the
668 * stop_machine() is heavy task, it is better to aggregate text_poke requests
669 * and do it once if possible.
670 *
671 * Note: Must be called under get_online_cpus() and text_mutex.
672 */
673void __kprobes text_poke_smp_batch(struct text_poke_param *params, int n)
674{
675 struct text_poke_params tpp = {.params = params, .nparams = n};
676
677 atomic_set(&stop_machine_first, 1);
678 wrote_text = 0;
Peter Zijlstrad91309f2011-02-11 22:07:46 +0100679 __stop_machine(stop_machine_text_poke, (void *)&tpp, NULL);
Masami Hiramatsu7deb18d2010-12-03 18:54:22 +0900680}
681
Jason Baronbf5438fc2010-09-17 11:09:00 -0400682#if defined(CONFIG_DYNAMIC_FTRACE) || defined(HAVE_JUMP_LABEL)
Jason Baronf49aa442010-09-17 11:08:51 -0400683
H. Peter Anvin2d1d7122010-10-27 21:09:15 -0700684#ifdef CONFIG_X86_64
685unsigned char ideal_nop5[5] = { 0x66, 0x66, 0x66, 0x66, 0x90 };
686#else
687unsigned char ideal_nop5[5] = { 0x3e, 0x8d, 0x74, 0x26, 0x00 };
688#endif
Jason Baronf49aa442010-09-17 11:08:51 -0400689
690void __init arch_init_ideal_nop5(void)
691{
Jason Baronf49aa442010-09-17 11:08:51 -0400692 /*
H. Peter Anvin2d1d7122010-10-27 21:09:15 -0700693 * There is no good nop for all x86 archs. This selection
694 * algorithm should be unified with the one in find_nop_table(),
695 * but this should be good enough for now.
Jason Baronf49aa442010-09-17 11:08:51 -0400696 *
H. Peter Anvin2d1d7122010-10-27 21:09:15 -0700697 * For cases other than the ones below, use the safe (as in
698 * always functional) defaults above.
Jason Baronf49aa442010-09-17 11:08:51 -0400699 */
H. Peter Anvin2d1d7122010-10-27 21:09:15 -0700700#ifdef CONFIG_X86_64
701 /* Don't use these on 32 bits due to broken virtualizers */
702 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL)
703 memcpy(ideal_nop5, p6_nops[5], 5);
704#endif
Jason Baronf49aa442010-09-17 11:08:51 -0400705}
706#endif