Blame - arch/x86_64/kernel/kprobes.c - android_kernel_lge_hammerhead

blob: f77f8a0ff1873bdc1717d473f74eb4116874eae7 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Kernel Probes (KProbes)
				3	* arch/x86_64/kernel/kprobes.c
				4	*
				5	* This program is free software; you can redistribute it and/or modify
				6	* it under the terms of the GNU General Public License as published by
				7	* the Free Software Foundation; either version 2 of the License, or
				8	* (at your option) any later version.
				9	*
				10	* This program is distributed in the hope that it will be useful,
				11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
				12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
				13	* GNU General Public License for more details.
				14	*
				15	* You should have received a copy of the GNU General Public License
				16	* along with this program; if not, write to the Free Software
				17	* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
				18	*
				19	* Copyright (C) IBM Corporation, 2002, 2004
				20	*
				21	* 2002-Oct Created by Vamsi Krishna S <vamsi_krishna@in.ibm.com> Kernel
				22	* Probes initial implementation ( includes contributions from
				23	* Rusty Russell).
				24	* 2004-July Suparna Bhattacharya <suparna@in.ibm.com> added jumper probes
				25	* interface to access function arguments.
				26	* 2004-Oct Jim Keniston <kenistoj@us.ibm.com> and Prasanna S Panchamukhi
				27	* <prasanna@in.ibm.com> adapted for x86_64
				28	* 2005-Mar Roland McGrath <roland@redhat.com>
				29	* Fixed to handle %rip-relative addressing mode correctly.
				30	*/
				31
				32	#include <linux/config.h>
				33	#include <linux/kprobes.h>
				34	#include <linux/ptrace.h>
				35	#include <linux/spinlock.h>
				36	#include <linux/string.h>
				37	#include <linux/slab.h>
				38	#include <linux/preempt.h>
				39	#include <linux/moduleloader.h>
				40
				41	#include <asm/pgtable.h>
				42	#include <asm/kdebug.h>
				43
				44	static DECLARE_MUTEX(kprobe_mutex);
				45
				46	/* kprobe_status settings */
				47	#define KPROBE_HIT_ACTIVE 0x00000001
				48	#define KPROBE_HIT_SS 0x00000002
				49
				50	static struct kprobe *current_kprobe;
				51	static unsigned long kprobe_status, kprobe_old_rflags, kprobe_saved_rflags;
				52	static struct pt_regs jprobe_saved_regs;
				53	static long *jprobe_saved_rsp;
				54	static kprobe_opcode_t *get_insn_slot(void);
				55	static void free_insn_slot(kprobe_opcode_t *slot);
				56	void jprobe_return_end(void);
				57
				58	/* copy of the kernel stack at the probe fire time */
				59	static kprobe_opcode_t jprobes_stack[MAX_STACK_SIZE];
				60
				61	/*
				62	* returns non-zero if opcode modifies the interrupt flag.
				63	*/
				64	static inline int is_IF_modifier(kprobe_opcode_t *insn)
				65	{
				66	switch (*insn) {
				67	case 0xfa: /* cli */
				68	case 0xfb: /* sti */
				69	case 0xcf: /* iret/iretd */
				70	case 0x9d: /* popf/popfd */
				71	return 1;
				72	}
				73
				74	if (insn >= 0x40 && insn <= 0x4f && *++insn == 0xcf)
				75	return 1;
				76	return 0;
				77	}
				78
				79	int arch_prepare_kprobe(struct kprobe *p)
				80	{
				81	/* insn: must be on special executable page on x86_64. */
				82	up(&kprobe_mutex);
				83	p->ainsn.insn = get_insn_slot();
				84	down(&kprobe_mutex);
				85	if (!p->ainsn.insn) {
				86	return -ENOMEM;
				87	}
				88	return 0;
				89	}
				90
				91	/*
				92	* Determine if the instruction uses the %rip-relative addressing mode.
				93	* If it does, return the address of the 32-bit displacement word.
				94	* If not, return null.
				95	*/
				96	static inline s32 is_riprel(u8 insn)
				97	{
				98	#define W(row,b0,b1,b2,b3,b4,b5,b6,b7,b8,b9,ba,bb,bc,bd,be,bf) \
				99	(((b0##UL << 0x0)\|(b1##UL << 0x1)\|(b2##UL << 0x2)\|(b3##UL << 0x3) \| \
				100	(b4##UL << 0x4)\|(b5##UL << 0x5)\|(b6##UL << 0x6)\|(b7##UL << 0x7) \| \
				101	(b8##UL << 0x8)\|(b9##UL << 0x9)\|(ba##UL << 0xa)\|(bb##UL << 0xb) \| \
				102	(bc##UL << 0xc)\|(bd##UL << 0xd)\|(be##UL << 0xe)\|(bf##UL << 0xf)) \
				103	<< (row % 64))
				104	static const u64 onebyte_has_modrm[256 / 64] = {
				105	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
				106	/* ------------------------------- */
				107	W(0x00, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)\| /* 00 */
				108	W(0x10, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)\| /* 10 */
				109	W(0x20, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0)\| /* 20 */
				110	W(0x30, 1,1,1,1,0,0,0,0,1,1,1,1,0,0,0,0), /* 30 */
				111	W(0x40, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* 40 */
				112	W(0x50, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* 50 */
				113	W(0x60, 0,0,1,1,0,0,0,0,0,1,0,1,0,0,0,0)\| /* 60 */
				114	W(0x70, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 70 */
				115	W(0x80, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* 80 */
				116	W(0x90, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* 90 */
				117	W(0xa0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* a0 */
				118	W(0xb0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* b0 */
				119	W(0xc0, 1,1,0,0,1,1,1,1,0,0,0,0,0,0,0,0)\| /* c0 */
				120	W(0xd0, 1,1,1,1,0,0,0,0,1,1,1,1,1,1,1,1)\| /* d0 */
				121	W(0xe0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* e0 */
				122	W(0xf0, 0,0,0,0,0,0,1,1,0,0,0,0,0,0,1,1) /* f0 */
				123	/* ------------------------------- */
				124	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
				125	};
				126	static const u64 twobyte_has_modrm[256 / 64] = {
				127	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
				128	/* ------------------------------- */
				129	W(0x00, 1,1,1,1,0,0,0,0,0,0,0,0,0,1,0,1)\| /* 0f */
				130	W(0x10, 1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,0)\| /* 1f */
				131	W(0x20, 1,1,1,1,1,0,1,0,1,1,1,1,1,1,1,1)\| /* 2f */
				132	W(0x30, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0), /* 3f */
				133	W(0x40, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* 4f */
				134	W(0x50, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* 5f */
				135	W(0x60, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* 6f */
				136	W(0x70, 1,1,1,1,1,1,1,0,0,0,0,0,1,1,1,1), /* 7f */
				137	W(0x80, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)\| /* 8f */
				138	W(0x90, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* 9f */
				139	W(0xa0, 0,0,0,1,1,1,1,1,0,0,0,1,1,1,1,1)\| /* af */
				140	W(0xb0, 1,1,1,1,1,1,1,1,0,0,1,1,1,1,1,1), /* bf */
				141	W(0xc0, 1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0)\| /* cf */
				142	W(0xd0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* df */
				143	W(0xe0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1)\| /* ef */
				144	W(0xf0, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0) /* ff */
				145	/* ------------------------------- */
				146	/* 0 1 2 3 4 5 6 7 8 9 a b c d e f */
				147	};
				148	#undef W
				149	int need_modrm;
				150
				151	/* Skip legacy instruction prefixes. */
				152	while (1) {
				153	switch (*insn) {
				154	case 0x66:
				155	case 0x67:
				156	case 0x2e:
				157	case 0x3e:
				158	case 0x26:
				159	case 0x64:
				160	case 0x65:
				161	case 0x36:
				162	case 0xf0:
				163	case 0xf3:
				164	case 0xf2:
				165	++insn;
				166	continue;
				167	}
				168	break;
				169	}
				170
				171	/* Skip REX instruction prefix. */
				172	if ((*insn & 0xf0) == 0x40)
				173	++insn;
				174
				175	if (insn == 0x0f) { / Two-byte opcode. */
				176	++insn;
				177	need_modrm = test_bit(*insn, twobyte_has_modrm);
				178	} else { /* One-byte opcode. */
				179	need_modrm = test_bit(*insn, onebyte_has_modrm);
				180	}
				181
				182	if (need_modrm) {
				183	u8 modrm = *++insn;
				184	if ((modrm & 0xc7) == 0x05) { /* %rip+disp32 addressing mode */
				185	/* Displacement follows ModRM byte. */
				186	return (s32 *) ++insn;
				187	}
				188	}
				189
				190	/* No %rip-relative addressing mode here. */
				191	return NULL;
				192	}
				193
				194	void arch_copy_kprobe(struct kprobe *p)
				195	{
				196	s32 *ripdisp;
				197	memcpy(p->ainsn.insn, p->addr, MAX_INSN_SIZE);
				198	ripdisp = is_riprel(p->ainsn.insn);
				199	if (ripdisp) {
				200	/*
				201	* The copied instruction uses the %rip-relative
				202	* addressing mode. Adjust the displacement for the
				203	* difference between the original location of this
				204	* instruction and the location of the copy that will
				205	* actually be run. The tricky bit here is making sure
				206	* that the sign extension happens correctly in this
				207	* calculation, since we need a signed 32-bit result to
				208	* be sign-extended to 64 bits when it's added to the
				209	* %rip value and yield the same 64-bit result that the
				210	* sign-extension of the original signed 32-bit
				211	* displacement would have given.
				212	*/
				213	s64 disp = (u8 ) p->addr + ripdisp - (u8 *) p->ainsn.insn;
				214	BUG_ON((s64) (s32) disp != disp); /* Sanity check. */
				215	*ripdisp = disp;
				216	}
				217	}
				218
				219	void arch_remove_kprobe(struct kprobe *p)
				220	{
				221	up(&kprobe_mutex);
				222	free_insn_slot(p->ainsn.insn);
				223	down(&kprobe_mutex);
				224	}
				225
				226	static inline void disarm_kprobe(struct kprobe p, struct pt_regs regs)
				227	{
				228	*p->addr = p->opcode;
				229	regs->rip = (unsigned long)p->addr;
				230	}
				231
				232	static void prepare_singlestep(struct kprobe p, struct pt_regs regs)
				233	{
				234	regs->eflags \|= TF_MASK;
				235	regs->eflags &= ~IF_MASK;
				236	/single step inline if the instruction is an int3/
				237	if (p->opcode == BREAKPOINT_INSTRUCTION)
				238	regs->rip = (unsigned long)p->addr;
				239	else
				240	regs->rip = (unsigned long)p->ainsn.insn;
				241	}
				242
				243	/*
				244	* Interrupts are disabled on entry as trap3 is an interrupt gate and they
				245	* remain disabled thorough out this function.
				246	*/
				247	int kprobe_handler(struct pt_regs *regs)
				248	{
				249	struct kprobe *p;
				250	int ret = 0;
				251	kprobe_opcode_t addr = (kprobe_opcode_t )(regs->rip - sizeof(kprobe_opcode_t));
				252
				253	/* We're in an interrupt, but this is clear and BUG()-safe. */
				254	preempt_disable();
				255
				256	/* Check we're not actually recursing */
				257	if (kprobe_running()) {
				258	/* We are holding lock here, so this is safe.
				259	Disarm the probe we just hit, and ignore it. */
				260	p = get_kprobe(addr);
				261	if (p) {
				262	if (kprobe_status == KPROBE_HIT_SS) {
				263	regs->eflags &= ~TF_MASK;
				264	regs->eflags \|= kprobe_saved_rflags;
				265	unlock_kprobes();
				266	goto no_kprobe;
				267	}
				268	disarm_kprobe(p, regs);
				269	ret = 1;
				270	} else {
				271	p = current_kprobe;
				272	if (p->break_handler && p->break_handler(p, regs)) {
				273	goto ss_probe;
				274	}
				275	}
				276	/* If it's not ours, can't be delete race, (we hold lock). */
				277	goto no_kprobe;
				278	}
				279
				280	lock_kprobes();
				281	p = get_kprobe(addr);
				282	if (!p) {
				283	unlock_kprobes();
				284	if (*addr != BREAKPOINT_INSTRUCTION) {
				285	/*
				286	* The breakpoint instruction was removed right
				287	* after we hit it. Another cpu has removed
				288	* either a probepoint or a debugger breakpoint
				289	* at this address. In either case, no further
				290	* handling of this interrupt is appropriate.
				291	*/
				292	ret = 1;
				293	}
				294	/* Not one of ours: let kernel handle it */
				295	goto no_kprobe;
				296	}
				297
				298	kprobe_status = KPROBE_HIT_ACTIVE;
				299	current_kprobe = p;
				300	kprobe_saved_rflags = kprobe_old_rflags
				301	= (regs->eflags & (TF_MASK \| IF_MASK));
				302	if (is_IF_modifier(p->ainsn.insn))
				303	kprobe_saved_rflags &= ~IF_MASK;
				304
				305	if (p->pre_handler && p->pre_handler(p, regs))
				306	/* handler has already set things up, so skip ss setup */
				307	return 1;
				308
				309	ss_probe:
				310	prepare_singlestep(p, regs);
				311	kprobe_status = KPROBE_HIT_SS;
				312	return 1;
				313
				314	no_kprobe:
				315	preempt_enable_no_resched();
				316	return ret;
				317	}
				318
				319	/*
				320	* Called after single-stepping. p->addr is the address of the
				321	* instruction whose first byte has been replaced by the "int 3"
				322	* instruction. To avoid the SMP problems that can occur when we
				323	* temporarily put back the original opcode to single-step, we
				324	* single-stepped a copy of the instruction. The address of this
				325	* copy is p->ainsn.insn.
				326	*
				327	* This function prepares to return from the post-single-step
				328	* interrupt. We have to fix up the stack as follows:
				329	*
				330	* 0) Except in the case of absolute or indirect jump or call instructions,
				331	* the new rip is relative to the copied instruction. We need to make
				332	* it relative to the original instruction.
				333	*
				334	* 1) If the single-stepped instruction was pushfl, then the TF and IF
				335	* flags are set in the just-pushed eflags, and may need to be cleared.
				336	*
				337	* 2) If the single-stepped instruction was a call, the return address
				338	* that is atop the stack is the address following the copied instruction.
				339	* We need to make it the address following the original instruction.
				340	*/
				341	static void resume_execution(struct kprobe p, struct pt_regs regs)
				342	{
				343	unsigned long tos = (unsigned long )regs->rsp;
				344	unsigned long next_rip = 0;
				345	unsigned long copy_rip = (unsigned long)p->ainsn.insn;
				346	unsigned long orig_rip = (unsigned long)p->addr;
				347	kprobe_opcode_t *insn = p->ainsn.insn;
				348
				349	/skip the REX prefix/
				350	if (insn >= 0x40 && insn <= 0x4f)
				351	insn++;
				352
				353	switch (*insn) {
				354	case 0x9c: /* pushfl */
				355	*tos &= ~(TF_MASK \| IF_MASK);
				356	*tos \|= kprobe_old_rflags;
				357	break;
Prasanna S Panchamukhi	0b9e2ca	2005-05-05 16:15:40 -0700	[diff] [blame^]	358	case 0xc3: /* ret/lret */
				359	case 0xcb:
				360	case 0xc2:
				361	case 0xca:
				362	regs->eflags &= ~TF_MASK;
				363	/* rip is already adjusted, no more changes required*/
				364	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	365	case 0xe8: /* call relative - Fix return addr */
				366	tos = orig_rip + (tos - copy_rip);
				367	break;
				368	case 0xff:
				369	if ((*insn & 0x30) == 0x10) {
				370	/* call absolute, indirect */
				371	/* Fix return addr; rip is correct. */
				372	next_rip = regs->rip;
				373	tos = orig_rip + (tos - copy_rip);
				374	} else if (((insn & 0x31) == 0x20) \|\| / jmp near, absolute indirect */
				375	((insn & 0x31) == 0x21)) { / jmp far, absolute indirect */
				376	/* rip is correct. */
				377	next_rip = regs->rip;
				378	}
				379	break;
				380	case 0xea: /* jmp absolute -- rip is correct */
				381	next_rip = regs->rip;
				382	break;
				383	default:
				384	break;
				385	}
				386
				387	regs->eflags &= ~TF_MASK;
				388	if (next_rip) {
				389	regs->rip = next_rip;
				390	} else {
				391	regs->rip = orig_rip + (regs->rip - copy_rip);
				392	}
				393	}
				394
				395	/*
				396	* Interrupts are disabled on entry as trap1 is an interrupt gate and they
				397	* remain disabled thoroughout this function. And we hold kprobe lock.
				398	*/
				399	int post_kprobe_handler(struct pt_regs *regs)
				400	{
				401	if (!kprobe_running())
				402	return 0;
				403
				404	if (current_kprobe->post_handler)
				405	current_kprobe->post_handler(current_kprobe, regs, 0);
				406
				407	resume_execution(current_kprobe, regs);
				408	regs->eflags \|= kprobe_saved_rflags;
				409
				410	unlock_kprobes();
				411	preempt_enable_no_resched();
				412
				413	/*
				414	* if somebody else is singlestepping across a probe point, eflags
				415	* will have TF set, in which case, continue the remaining processing
				416	* of do_debug, as if this is not a probe hit.
				417	*/
				418	if (regs->eflags & TF_MASK)
				419	return 0;
				420
				421	return 1;
				422	}
				423
				424	/* Interrupts disabled, kprobe_lock held. */
				425	int kprobe_fault_handler(struct pt_regs *regs, int trapnr)
				426	{
				427	if (current_kprobe->fault_handler
				428	&& current_kprobe->fault_handler(current_kprobe, regs, trapnr))
				429	return 1;
				430
				431	if (kprobe_status & KPROBE_HIT_SS) {
				432	resume_execution(current_kprobe, regs);
				433	regs->eflags \|= kprobe_old_rflags;
				434
				435	unlock_kprobes();
				436	preempt_enable_no_resched();
				437	}
				438	return 0;
				439	}
				440
				441	/*
				442	* Wrapper routine for handling exceptions.
				443	*/
				444	int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val,
				445	void *data)
				446	{
				447	struct die_args args = (struct die_args )data;
				448	switch (val) {
				449	case DIE_INT3:
				450	if (kprobe_handler(args->regs))
				451	return NOTIFY_STOP;
				452	break;
				453	case DIE_DEBUG:
				454	if (post_kprobe_handler(args->regs))
				455	return NOTIFY_STOP;
				456	break;
				457	case DIE_GPF:
				458	if (kprobe_running() &&
				459	kprobe_fault_handler(args->regs, args->trapnr))
				460	return NOTIFY_STOP;
				461	break;
				462	case DIE_PAGE_FAULT:
				463	if (kprobe_running() &&
				464	kprobe_fault_handler(args->regs, args->trapnr))
				465	return NOTIFY_STOP;
				466	break;
				467	default:
				468	break;
				469	}
				470	return NOTIFY_DONE;
				471	}
				472
				473	int setjmp_pre_handler(struct kprobe p, struct pt_regs regs)
				474	{
				475	struct jprobe *jp = container_of(p, struct jprobe, kp);
				476	unsigned long addr;
				477
				478	jprobe_saved_regs = *regs;
				479	jprobe_saved_rsp = (long *) regs->rsp;
				480	addr = (unsigned long)jprobe_saved_rsp;
				481	/*
				482	* As Linus pointed out, gcc assumes that the callee
				483	* owns the argument space and could overwrite it, e.g.
				484	* tailcall optimization. So, to be absolutely safe
				485	* we also save and restore enough stack bytes to cover
				486	* the argument area.
				487	*/
				488	memcpy(jprobes_stack, (kprobe_opcode_t *) addr, MIN_STACK_SIZE(addr));
				489	regs->eflags &= ~IF_MASK;
				490	regs->rip = (unsigned long)(jp->entry);
				491	return 1;
				492	}
				493
				494	void jprobe_return(void)
				495	{
				496	preempt_enable_no_resched();
				497	asm volatile (" xchg %%rbx,%%rsp \n"
				498	" int3 \n"
				499	" .globl jprobe_return_end \n"
				500	" jprobe_return_end: \n"
				501	" nop \n"::"b"
				502	(jprobe_saved_rsp):"memory");
				503	}
				504
				505	int longjmp_break_handler(struct kprobe p, struct pt_regs regs)
				506	{
				507	u8 addr = (u8 ) (regs->rip - 1);
				508	unsigned long stack_addr = (unsigned long)jprobe_saved_rsp;
				509	struct jprobe *jp = container_of(p, struct jprobe, kp);
				510
				511	if ((addr > (u8 ) jprobe_return) && (addr < (u8 ) jprobe_return_end)) {
				512	if ((long *)regs->rsp != jprobe_saved_rsp) {
				513	struct pt_regs *saved_regs =
				514	container_of(jprobe_saved_rsp, struct pt_regs, rsp);
				515	printk("current rsp %p does not match saved rsp %p\n",
				516	(long *)regs->rsp, jprobe_saved_rsp);
				517	printk("Saved registers for jprobe %p\n", jp);
				518	show_registers(saved_regs);
				519	printk("Current registers\n");
				520	show_registers(regs);
				521	BUG();
				522	}
				523	*regs = jprobe_saved_regs;
				524	memcpy((kprobe_opcode_t *) stack_addr, jprobes_stack,
				525	MIN_STACK_SIZE(stack_addr));
				526	return 1;
				527	}
				528	return 0;
				529	}
				530
				531	/*
				532	* kprobe->ainsn.insn points to the copy of the instruction to be single-stepped.
				533	* By default on x86_64, pages we get from kmalloc or vmalloc are not
				534	* executable. Single-stepping an instruction on such a page yields an
				535	* oops. So instead of storing the instruction copies in their respective
				536	* kprobe objects, we allocate a page, map it executable, and store all the
				537	* instruction copies there. (We can allocate additional pages if somebody
				538	* inserts a huge number of probes.) Each page can hold up to INSNS_PER_PAGE
				539	* instruction slots, each of which is MAX_INSN_SIZE*sizeof(kprobe_opcode_t)
				540	* bytes.
				541	*/
				542	#define INSNS_PER_PAGE (PAGE_SIZE/(MAX_INSN_SIZE*sizeof(kprobe_opcode_t)))
				543	struct kprobe_insn_page {
				544	struct hlist_node hlist;
				545	kprobe_opcode_t insns; / page of instruction slots */
				546	char slot_used[INSNS_PER_PAGE];
				547	int nused;
				548	};
				549
				550	static struct hlist_head kprobe_insn_pages;
				551
				552	/**
				553	* get_insn_slot() - Find a slot on an executable page for an instruction.
				554	* We allocate an executable page if there's no room on existing ones.
				555	*/
				556	static kprobe_opcode_t *get_insn_slot(void)
				557	{
				558	struct kprobe_insn_page *kip;
				559	struct hlist_node *pos;
				560
				561	hlist_for_each(pos, &kprobe_insn_pages) {
				562	kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
				563	if (kip->nused < INSNS_PER_PAGE) {
				564	int i;
				565	for (i = 0; i < INSNS_PER_PAGE; i++) {
				566	if (!kip->slot_used[i]) {
				567	kip->slot_used[i] = 1;
				568	kip->nused++;
				569	return kip->insns + (i*MAX_INSN_SIZE);
				570	}
				571	}
				572	/* Surprise! No unused slots. Fix kip->nused. */
				573	kip->nused = INSNS_PER_PAGE;
				574	}
				575	}
				576
				577	/* All out of space. Need to allocate a new page. Use slot 0.*/
				578	kip = kmalloc(sizeof(struct kprobe_insn_page), GFP_KERNEL);
				579	if (!kip) {
				580	return NULL;
				581	}
				582
				583	/*
				584	* For the %rip-relative displacement fixups to be doable, we
				585	* need our instruction copy to be within +/- 2GB of any data it
				586	* might access via %rip. That is, within 2GB of where the
				587	* kernel image and loaded module images reside. So we allocate
				588	* a page in the module loading area.
				589	*/
				590	kip->insns = module_alloc(PAGE_SIZE);
				591	if (!kip->insns) {
				592	kfree(kip);
				593	return NULL;
				594	}
				595	INIT_HLIST_NODE(&kip->hlist);
				596	hlist_add_head(&kip->hlist, &kprobe_insn_pages);
				597	memset(kip->slot_used, 0, INSNS_PER_PAGE);
				598	kip->slot_used[0] = 1;
				599	kip->nused = 1;
				600	return kip->insns;
				601	}
				602
				603	/**
				604	* free_insn_slot() - Free instruction slot obtained from get_insn_slot().
				605	*/
				606	static void free_insn_slot(kprobe_opcode_t *slot)
				607	{
				608	struct kprobe_insn_page *kip;
				609	struct hlist_node *pos;
				610
				611	hlist_for_each(pos, &kprobe_insn_pages) {
				612	kip = hlist_entry(pos, struct kprobe_insn_page, hlist);
				613	if (kip->insns <= slot
				614	&& slot < kip->insns+(INSNS_PER_PAGE*MAX_INSN_SIZE)) {
				615	int i = (slot - kip->insns) / MAX_INSN_SIZE;
				616	kip->slot_used[i] = 0;
				617	kip->nused--;
				618	if (kip->nused == 0) {
				619	/*
				620	* Page is no longer in use. Free it unless
				621	* it's the last one. We keep the last one
				622	* so as not to have to set it up again the
				623	* next time somebody inserts a probe.
				624	*/
				625	hlist_del(&kip->hlist);
				626	if (hlist_empty(&kprobe_insn_pages)) {
				627	INIT_HLIST_NODE(&kip->hlist);
				628	hlist_add_head(&kip->hlist,
				629	&kprobe_insn_pages);
				630	} else {
				631	module_free(NULL, kip->insns);
				632	kfree(kip);
				633	}
				634	}
				635	return;
				636	}
				637	}
				638	}