blob: 36fd42091fa7283d71e189ddb55cf29a5e940790 [file] [log] [blame]
Srikar Dronamraju2b144492012-02-09 14:56:42 +05301/*
Ingo Molnar7b2d81d2012-02-17 09:27:41 +01002 * User-space Probes (UProbes) for x86
Srikar Dronamraju2b144492012-02-09 14:56:42 +05303 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
17 *
18 * Copyright (C) IBM Corporation, 2008-2011
19 * Authors:
20 * Srikar Dronamraju
21 * Jim Keniston
22 */
Srikar Dronamraju2b144492012-02-09 14:56:42 +053023#include <linux/kernel.h>
24#include <linux/sched.h>
25#include <linux/ptrace.h>
26#include <linux/uprobes.h>
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +053027#include <linux/uaccess.h>
Srikar Dronamraju2b144492012-02-09 14:56:42 +053028
29#include <linux/kdebug.h>
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +053030#include <asm/processor.h>
Srikar Dronamraju2b144492012-02-09 14:56:42 +053031#include <asm/insn.h>
32
33/* Post-execution fixups. */
34
35/* No fixup needed */
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +053036#define UPROBE_FIX_NONE 0x0
37
Srikar Dronamraju2b144492012-02-09 14:56:42 +053038/* Adjust IP back to vicinity of actual insn */
Srikar Dronamraju900771a2012-03-12 14:55:14 +053039#define UPROBE_FIX_IP 0x1
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +053040
Srikar Dronamraju2b144492012-02-09 14:56:42 +053041/* Adjust the return address of a call insn */
Srikar Dronamraju900771a2012-03-12 14:55:14 +053042#define UPROBE_FIX_CALL 0x2
Srikar Dronamraju2b144492012-02-09 14:56:42 +053043
Srikar Dronamraju900771a2012-03-12 14:55:14 +053044#define UPROBE_FIX_RIP_AX 0x8000
45#define UPROBE_FIX_RIP_CX 0x4000
Srikar Dronamraju2b144492012-02-09 14:56:42 +053046
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +053047#define UPROBE_TRAP_NR UINT_MAX
48
Srikar Dronamraju2b144492012-02-09 14:56:42 +053049/* Adaptations for mhiramat x86 decoder v14. */
Ingo Molnar7b2d81d2012-02-17 09:27:41 +010050#define OPCODE1(insn) ((insn)->opcode.bytes[0])
51#define OPCODE2(insn) ((insn)->opcode.bytes[1])
52#define OPCODE3(insn) ((insn)->opcode.bytes[2])
53#define MODRM_REG(insn) X86_MODRM_REG(insn->modrm.value)
Srikar Dronamraju2b144492012-02-09 14:56:42 +053054
55#define W(row, b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, ba, bb, bc, bd, be, bf)\
56 (((b0##UL << 0x0)|(b1##UL << 0x1)|(b2##UL << 0x2)|(b3##UL << 0x3) | \
57 (b4##UL << 0x4)|(b5##UL << 0x5)|(b6##UL << 0x6)|(b7##UL << 0x7) | \
58 (b8##UL << 0x8)|(b9##UL << 0x9)|(ba##UL << 0xa)|(bb##UL << 0xb) | \
59 (bc##UL << 0xc)|(bd##UL << 0xd)|(be##UL << 0xe)|(bf##UL << 0xf)) \
60 << (row % 32))
61
Srikar Dronamraju04a3d982012-02-22 14:45:35 +053062/*
63 * Good-instruction tables for 32-bit apps. This is non-const and volatile
64 * to keep gcc from statically optimizing it out, as variable_test_bit makes
65 * some versions of gcc to think only *(unsigned long*) is used.
66 */
67static volatile u32 good_insns_32[256 / 32] = {
Srikar Dronamraju2b144492012-02-09 14:56:42 +053068 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
69 /* ---------------------------------------------- */
70 W(0x00, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) | /* 00 */
71 W(0x10, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0) , /* 10 */
72 W(0x20, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) | /* 20 */
73 W(0x30, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1) , /* 30 */
74 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
75 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
76 W(0x60, 1, 1, 1, 0, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
77 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
78 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
79 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
80 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
81 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
82 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
83 W(0xd0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
84 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
85 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
86 /* ---------------------------------------------- */
87 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
88};
89
90/* Using this for both 64-bit and 32-bit apps */
Srikar Dronamraju04a3d982012-02-22 14:45:35 +053091static volatile u32 good_2byte_insns[256 / 32] = {
Srikar Dronamraju2b144492012-02-09 14:56:42 +053092 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
93 /* ---------------------------------------------- */
94 W(0x00, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1) | /* 00 */
95 W(0x10, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* 10 */
96 W(0x20, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1) | /* 20 */
97 W(0x30, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) , /* 30 */
98 W(0x40, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 40 */
99 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
100 W(0x60, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 60 */
101 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1) , /* 70 */
102 W(0x80, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
103 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
104 W(0xa0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1) | /* a0 */
105 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
106 W(0xc0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* c0 */
107 W(0xd0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
108 W(0xe0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* e0 */
109 W(0xf0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0) /* f0 */
110 /* ---------------------------------------------- */
111 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
112};
113
Srikar Dronamraju04a3d982012-02-22 14:45:35 +0530114#ifdef CONFIG_X86_64
115/* Good-instruction tables for 64-bit apps */
116static volatile u32 good_insns_64[256 / 32] = {
117 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
118 /* ---------------------------------------------- */
119 W(0x00, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 00 */
120 W(0x10, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 10 */
121 W(0x20, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) | /* 20 */
122 W(0x30, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0) , /* 30 */
123 W(0x40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) | /* 40 */
124 W(0x50, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 50 */
125 W(0x60, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* 60 */
126 W(0x70, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 70 */
127 W(0x80, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* 80 */
128 W(0x90, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* 90 */
129 W(0xa0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) | /* a0 */
130 W(0xb0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* b0 */
131 W(0xc0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0) | /* c0 */
132 W(0xd0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1) , /* d0 */
133 W(0xe0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0) | /* e0 */
134 W(0xf0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1) /* f0 */
135 /* ---------------------------------------------- */
136 /* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
137};
138#endif
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530139#undef W
140
141/*
142 * opcodes we'll probably never support:
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100143 *
144 * 6c-6d, e4-e5, ec-ed - in
145 * 6e-6f, e6-e7, ee-ef - out
146 * cc, cd - int3, int
147 * cf - iret
148 * d6 - illegal instruction
149 * f1 - int1/icebp
150 * f4 - hlt
151 * fa, fb - cli, sti
152 * 0f - lar, lsl, syscall, clts, sysret, sysenter, sysexit, invd, wbinvd, ud2
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530153 *
154 * invalid opcodes in 64-bit mode:
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530155 *
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100156 * 06, 0e, 16, 1e, 27, 2f, 37, 3f, 60-62, 82, c4-c5, d4-d5
157 * 63 - we support this opcode in x86_64 but not in i386.
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530158 *
159 * opcodes we may need to refine support for:
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100160 *
161 * 0f - 2-byte instructions: For many of these instructions, the validity
162 * depends on the prefix and/or the reg field. On such instructions, we
163 * just consider the opcode combination valid if it corresponds to any
164 * valid instruction.
165 *
166 * 8f - Group 1 - only reg = 0 is OK
167 * c6-c7 - Group 11 - only reg = 0 is OK
168 * d9-df - fpu insns with some illegal encodings
169 * f2, f3 - repnz, repz prefixes. These are also the first byte for
170 * certain floating-point instructions, such as addsd.
171 *
172 * fe - Group 4 - only reg = 0 or 1 is OK
173 * ff - Group 5 - only reg = 0-6 is OK
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530174 *
175 * others -- Do we need to support these?
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100176 *
177 * 0f - (floating-point?) prefetch instructions
178 * 07, 17, 1f - pop es, pop ss, pop ds
179 * 26, 2e, 36, 3e - es:, cs:, ss:, ds: segment prefixes --
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530180 * but 64 and 65 (fs: and gs:) seem to be used, so we support them
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100181 * 67 - addr16 prefix
182 * ce - into
183 * f0 - lock prefix
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530184 */
185
186/*
187 * TODO:
188 * - Where necessary, examine the modrm byte and allow only valid instructions
189 * in the different Groups and fpu instructions.
190 */
191
192static bool is_prefix_bad(struct insn *insn)
193{
194 int i;
195
196 for (i = 0; i < insn->prefixes.nbytes; i++) {
197 switch (insn->prefixes.bytes[i]) {
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100198 case 0x26: /* INAT_PFX_ES */
199 case 0x2E: /* INAT_PFX_CS */
200 case 0x36: /* INAT_PFX_DS */
201 case 0x3E: /* INAT_PFX_SS */
202 case 0xF0: /* INAT_PFX_LOCK */
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530203 return true;
204 }
205 }
206 return false;
207}
208
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530209static int validate_insn_32bits(struct arch_uprobe *auprobe, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530210{
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530211 insn_init(insn, auprobe->insn, false);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530212
213 /* Skip good instruction prefixes; reject "bad" ones. */
214 insn_get_opcode(insn);
215 if (is_prefix_bad(insn))
216 return -ENOTSUPP;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100217
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530218 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_32))
219 return 0;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100220
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530221 if (insn->opcode.nbytes == 2) {
222 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
223 return 0;
224 }
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100225
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530226 return -ENOTSUPP;
227}
228
229/*
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +0530230 * Figure out which fixups arch_uprobe_post_xol() will need to perform, and
231 * annotate arch_uprobe->fixups accordingly. To start with,
232 * arch_uprobe->fixups is either zero or it reflects rip-related fixups.
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530233 */
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530234static void prepare_fixups(struct arch_uprobe *auprobe, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530235{
236 bool fix_ip = true, fix_call = false; /* defaults */
237 int reg;
238
239 insn_get_opcode(insn); /* should be a nop */
240
241 switch (OPCODE1(insn)) {
242 case 0xc3: /* ret/lret */
243 case 0xcb:
244 case 0xc2:
245 case 0xca:
246 /* ip is correct */
247 fix_ip = false;
248 break;
249 case 0xe8: /* call relative - Fix return addr */
250 fix_call = true;
251 break;
252 case 0x9a: /* call absolute - Fix return addr, not ip */
253 fix_call = true;
254 fix_ip = false;
255 break;
256 case 0xff:
257 insn_get_modrm(insn);
258 reg = MODRM_REG(insn);
259 if (reg == 2 || reg == 3) {
260 /* call or lcall, indirect */
261 /* Fix return addr; ip is correct. */
262 fix_call = true;
263 fix_ip = false;
264 } else if (reg == 4 || reg == 5) {
265 /* jmp or ljmp, indirect */
266 /* ip is correct. */
267 fix_ip = false;
268 }
269 break;
270 case 0xea: /* jmp absolute -- ip is correct */
271 fix_ip = false;
272 break;
273 default:
274 break;
275 }
276 if (fix_ip)
Srikar Dronamraju900771a2012-03-12 14:55:14 +0530277 auprobe->fixups |= UPROBE_FIX_IP;
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530278 if (fix_call)
Srikar Dronamraju900771a2012-03-12 14:55:14 +0530279 auprobe->fixups |= UPROBE_FIX_CALL;
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530280}
281
282#ifdef CONFIG_X86_64
283/*
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530284 * If arch_uprobe->insn doesn't use rip-relative addressing, return
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530285 * immediately. Otherwise, rewrite the instruction so that it accesses
286 * its memory operand indirectly through a scratch register. Set
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530287 * arch_uprobe->fixups and arch_uprobe->rip_rela_target_address
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530288 * accordingly. (The contents of the scratch register will be saved
289 * before we single-step the modified instruction, and restored
290 * afterward.)
291 *
292 * We do this because a rip-relative instruction can access only a
293 * relatively small area (+/- 2 GB from the instruction), and the XOL
294 * area typically lies beyond that area. At least for instructions
295 * that store to memory, we can't execute the original instruction
296 * and "fix things up" later, because the misdirected store could be
297 * disastrous.
298 *
299 * Some useful facts about rip-relative instructions:
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100300 *
301 * - There's always a modrm byte.
302 * - There's never a SIB byte.
303 * - The displacement is always 4 bytes.
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530304 */
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530305static void
306handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530307{
308 u8 *cursor;
309 u8 reg;
310
311 if (mm->context.ia32_compat)
312 return;
313
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530314 auprobe->rip_rela_target_address = 0x0;
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530315 if (!insn_rip_relative(insn))
316 return;
317
318 /*
319 * insn_rip_relative() would have decoded rex_prefix, modrm.
320 * Clear REX.b bit (extension of MODRM.rm field):
321 * we want to encode rax/rcx, not r8/r9.
322 */
323 if (insn->rex_prefix.nbytes) {
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530324 cursor = auprobe->insn + insn_offset_rex_prefix(insn);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530325 *cursor &= 0xfe; /* Clearing REX.B bit */
326 }
327
328 /*
329 * Point cursor at the modrm byte. The next 4 bytes are the
330 * displacement. Beyond the displacement, for some instructions,
331 * is the immediate operand.
332 */
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530333 cursor = auprobe->insn + insn_offset_modrm(insn);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530334 insn_get_length(insn);
335
336 /*
337 * Convert from rip-relative addressing to indirect addressing
338 * via a scratch register. Change the r/m field from 0x5 (%rip)
339 * to 0x0 (%rax) or 0x1 (%rcx), and squeeze out the offset field.
340 */
341 reg = MODRM_REG(insn);
342 if (reg == 0) {
343 /*
344 * The register operand (if any) is either the A register
345 * (%rax, %eax, etc.) or (if the 0x4 bit is set in the
346 * REX prefix) %r8. In any case, we know the C register
347 * is NOT the register operand, so we use %rcx (register
348 * #1) for the scratch register.
349 */
Srikar Dronamraju900771a2012-03-12 14:55:14 +0530350 auprobe->fixups = UPROBE_FIX_RIP_CX;
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530351 /* Change modrm from 00 000 101 to 00 000 001. */
352 *cursor = 0x1;
353 } else {
354 /* Use %rax (register #0) for the scratch register. */
Srikar Dronamraju900771a2012-03-12 14:55:14 +0530355 auprobe->fixups = UPROBE_FIX_RIP_AX;
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530356 /* Change modrm from 00 xxx 101 to 00 xxx 000 */
357 *cursor = (reg << 3);
358 }
359
360 /* Target address = address of next instruction + (signed) offset */
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530361 auprobe->rip_rela_target_address = (long)insn->length + insn->displacement.value;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100362
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530363 /* Displacement field is gone; slide immediate field (if any) over. */
364 if (insn->immediate.nbytes) {
365 cursor++;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100366 memmove(cursor, cursor + insn->displacement.nbytes, insn->immediate.nbytes);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530367 }
368 return;
369}
370
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530371static int validate_insn_64bits(struct arch_uprobe *auprobe, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530372{
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530373 insn_init(insn, auprobe->insn, true);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530374
375 /* Skip good instruction prefixes; reject "bad" ones. */
376 insn_get_opcode(insn);
377 if (is_prefix_bad(insn))
378 return -ENOTSUPP;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100379
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530380 if (test_bit(OPCODE1(insn), (unsigned long *)good_insns_64))
381 return 0;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100382
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530383 if (insn->opcode.nbytes == 2) {
384 if (test_bit(OPCODE2(insn), (unsigned long *)good_2byte_insns))
385 return 0;
386 }
387 return -ENOTSUPP;
388}
389
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530390static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530391{
392 if (mm->context.ia32_compat)
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530393 return validate_insn_32bits(auprobe, insn);
394 return validate_insn_64bits(auprobe, insn);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530395}
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100396#else /* 32-bit: */
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530397static void handle_riprel_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530398{
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100399 /* No RIP-relative addressing on 32-bit */
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530400}
401
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530402static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, struct insn *insn)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530403{
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530404 return validate_insn_32bits(auprobe, insn);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530405}
406#endif /* CONFIG_X86_64 */
407
408/**
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +0530409 * arch_uprobe_analyze_insn - instruction analysis including validity and fixups.
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530410 * @mm: the probed address space.
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530411 * @arch_uprobe: the probepoint information.
Ananth N Mavinakayanahalli7eb9ba52012-06-08 15:02:57 +0530412 * @addr: virtual address at which to install the probepoint
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530413 * Return 0 on success or a -ve number on error.
414 */
Ananth N Mavinakayanahalli7eb9ba52012-06-08 15:02:57 +0530415int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr)
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530416{
417 int ret;
418 struct insn insn;
419
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530420 auprobe->fixups = 0;
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530421 ret = validate_insn_bits(auprobe, mm, &insn);
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530422 if (ret != 0)
423 return ret;
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100424
Srikar Dronamrajue3343e62012-03-12 14:55:30 +0530425 handle_riprel_insn(auprobe, mm, &insn);
Srikar Dronamraju3ff54ef2012-02-22 14:46:02 +0530426 prepare_fixups(auprobe, &insn);
Ingo Molnar7b2d81d2012-02-17 09:27:41 +0100427
Srikar Dronamraju2b144492012-02-09 14:56:42 +0530428 return 0;
429}
Srikar Dronamraju0326f5a2012-03-13 23:30:11 +0530430
431#ifdef CONFIG_X86_64
432/*
433 * If we're emulating a rip-relative instruction, save the contents
434 * of the scratch register and store the target address in that register.
435 */
436static void
437pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
438 struct arch_uprobe_task *autask)
439{
440 if (auprobe->fixups & UPROBE_FIX_RIP_AX) {
441 autask->saved_scratch_register = regs->ax;
442 regs->ax = current->utask->vaddr;
443 regs->ax += auprobe->rip_rela_target_address;
444 } else if (auprobe->fixups & UPROBE_FIX_RIP_CX) {
445 autask->saved_scratch_register = regs->cx;
446 regs->cx = current->utask->vaddr;
447 regs->cx += auprobe->rip_rela_target_address;
448 }
449}
450#else
451static void
452pre_xol_rip_insn(struct arch_uprobe *auprobe, struct pt_regs *regs,
453 struct arch_uprobe_task *autask)
454{
455 /* No RIP-relative addressing on 32-bit */
456}
457#endif
458
459/*
460 * arch_uprobe_pre_xol - prepare to execute out of line.
461 * @auprobe: the probepoint information.
462 * @regs: reflects the saved user state of current task.
463 */
464int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
465{
466 struct arch_uprobe_task *autask;
467
468 autask = &current->utask->autask;
469 autask->saved_trap_nr = current->thread.trap_nr;
470 current->thread.trap_nr = UPROBE_TRAP_NR;
471 regs->ip = current->utask->xol_vaddr;
472 pre_xol_rip_insn(auprobe, regs, autask);
473
474 return 0;
475}
476
477/*
478 * This function is called by arch_uprobe_post_xol() to adjust the return
479 * address pushed by a call instruction executed out of line.
480 */
481static int adjust_ret_addr(unsigned long sp, long correction)
482{
483 int rasize, ncopied;
484 long ra = 0;
485
486 if (is_ia32_task())
487 rasize = 4;
488 else
489 rasize = 8;
490
491 ncopied = copy_from_user(&ra, (void __user *)sp, rasize);
492 if (unlikely(ncopied))
493 return -EFAULT;
494
495 ra += correction;
496 ncopied = copy_to_user((void __user *)sp, &ra, rasize);
497 if (unlikely(ncopied))
498 return -EFAULT;
499
500 return 0;
501}
502
503#ifdef CONFIG_X86_64
504static bool is_riprel_insn(struct arch_uprobe *auprobe)
505{
506 return ((auprobe->fixups & (UPROBE_FIX_RIP_AX | UPROBE_FIX_RIP_CX)) != 0);
507}
508
509static void
510handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
511{
512 if (is_riprel_insn(auprobe)) {
513 struct arch_uprobe_task *autask;
514
515 autask = &current->utask->autask;
516 if (auprobe->fixups & UPROBE_FIX_RIP_AX)
517 regs->ax = autask->saved_scratch_register;
518 else
519 regs->cx = autask->saved_scratch_register;
520
521 /*
522 * The original instruction includes a displacement, and so
523 * is 4 bytes longer than what we've just single-stepped.
524 * Fall through to handle stuff like "jmpq *...(%rip)" and
525 * "callq *...(%rip)".
526 */
527 if (correction)
528 *correction += 4;
529 }
530}
531#else
532static void
533handle_riprel_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs, long *correction)
534{
535 /* No RIP-relative addressing on 32-bit */
536}
537#endif
538
539/*
540 * If xol insn itself traps and generates a signal(Say,
541 * SIGILL/SIGSEGV/etc), then detect the case where a singlestepped
542 * instruction jumps back to its own address. It is assumed that anything
543 * like do_page_fault/do_trap/etc sets thread.trap_nr != -1.
544 *
545 * arch_uprobe_pre_xol/arch_uprobe_post_xol save/restore thread.trap_nr,
546 * arch_uprobe_xol_was_trapped() simply checks that ->trap_nr is not equal to
547 * UPROBE_TRAP_NR == -1 set by arch_uprobe_pre_xol().
548 */
549bool arch_uprobe_xol_was_trapped(struct task_struct *t)
550{
551 if (t->thread.trap_nr != UPROBE_TRAP_NR)
552 return true;
553
554 return false;
555}
556
557/*
558 * Called after single-stepping. To avoid the SMP problems that can
559 * occur when we temporarily put back the original opcode to
560 * single-step, we single-stepped a copy of the instruction.
561 *
562 * This function prepares to resume execution after the single-step.
563 * We have to fix things up as follows:
564 *
565 * Typically, the new ip is relative to the copied instruction. We need
566 * to make it relative to the original instruction (FIX_IP). Exceptions
567 * are return instructions and absolute or indirect jump or call instructions.
568 *
569 * If the single-stepped instruction was a call, the return address that
570 * is atop the stack is the address following the copied instruction. We
571 * need to make it the address following the original instruction (FIX_CALL).
572 *
573 * If the original instruction was a rip-relative instruction such as
574 * "movl %edx,0xnnnn(%rip)", we have instead executed an equivalent
575 * instruction using a scratch register -- e.g., "movl %edx,(%rax)".
576 * We need to restore the contents of the scratch register and adjust
577 * the ip, keeping in mind that the instruction we executed is 4 bytes
578 * shorter than the original instruction (since we squeezed out the offset
579 * field). (FIX_RIP_AX or FIX_RIP_CX)
580 */
581int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
582{
583 struct uprobe_task *utask;
584 long correction;
585 int result = 0;
586
587 WARN_ON_ONCE(current->thread.trap_nr != UPROBE_TRAP_NR);
588
589 utask = current->utask;
590 current->thread.trap_nr = utask->autask.saved_trap_nr;
591 correction = (long)(utask->vaddr - utask->xol_vaddr);
592 handle_riprel_post_xol(auprobe, regs, &correction);
593 if (auprobe->fixups & UPROBE_FIX_IP)
594 regs->ip += correction;
595
596 if (auprobe->fixups & UPROBE_FIX_CALL)
597 result = adjust_ret_addr(regs->sp, correction);
598
599 return result;
600}
601
602/* callback routine for handling exceptions. */
603int arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data)
604{
605 struct die_args *args = data;
606 struct pt_regs *regs = args->regs;
607 int ret = NOTIFY_DONE;
608
609 /* We are only interested in userspace traps */
610 if (regs && !user_mode_vm(regs))
611 return NOTIFY_DONE;
612
613 switch (val) {
614 case DIE_INT3:
615 if (uprobe_pre_sstep_notifier(regs))
616 ret = NOTIFY_STOP;
617
618 break;
619
620 case DIE_DEBUG:
621 if (uprobe_post_sstep_notifier(regs))
622 ret = NOTIFY_STOP;
623
624 default:
625 break;
626 }
627
628 return ret;
629}
630
631/*
632 * This function gets called when XOL instruction either gets trapped or
633 * the thread has a fatal signal, so reset the instruction pointer to its
634 * probed address.
635 */
636void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
637{
638 struct uprobe_task *utask = current->utask;
639
640 current->thread.trap_nr = utask->autask.saved_trap_nr;
641 handle_riprel_post_xol(auprobe, regs, NULL);
642 instruction_pointer_set(regs, utask->vaddr);
643}
644
645/*
646 * Skip these instructions as per the currently known x86 ISA.
647 * 0x66* { 0x90 | 0x0f 0x1f | 0x0f 0x19 | 0x87 0xc0 }
648 */
649bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
650{
651 int i;
652
653 for (i = 0; i < MAX_UINSN_BYTES; i++) {
654 if ((auprobe->insn[i] == 0x66))
655 continue;
656
657 if (auprobe->insn[i] == 0x90)
658 return true;
659
660 if (i == (MAX_UINSN_BYTES - 1))
661 break;
662
663 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x1f))
664 return true;
665
666 if ((auprobe->insn[i] == 0x0f) && (auprobe->insn[i+1] == 0x19))
667 return true;
668
669 if ((auprobe->insn[i] == 0x87) && (auprobe->insn[i+1] == 0xc0))
670 return true;
671
672 break;
673 }
674 return false;
675}