Blame - arch/ia64/kernel/unaligned.c - android_kernel_htc_msm8960

blob: ff0e7c10faa7222d48964d355bf3322b45506800 [file] [log] [blame]

Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1	/*
				2	* Architecture-specific unaligned trap handling.
				3	*
				4	* Copyright (C) 1999-2002, 2004 Hewlett-Packard Co
				5	* Stephane Eranian <eranian@hpl.hp.com>
				6	* David Mosberger-Tang <davidm@hpl.hp.com>
				7	*
				8	* 2002/12/09 Fix rotating register handling (off-by-1 error, missing fr-rotation). Fix
				9	* get_rse_reg() to not leak kernel bits to user-level (reading an out-of-frame
				10	* stacked register returns an undefined value; it does NOT trigger a
				11	* "rsvd register fault").
				12	* 2001/10/11 Fix unaligned access to rotating registers in s/w pipelined loops.
				13	* 2001/08/13 Correct size of extended floats (float_fsz) from 16 to 10 bytes.
				14	* 2001/01/17 Add support emulation of unaligned kernel accesses.
				15	*/
S.Caglar Onur	5cf1f7c	2008-03-28 14:27:05 -0700	[diff] [blame]	16	#include <linux/jiffies.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	17	#include <linux/kernel.h>
				18	#include <linux/sched.h>
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	19	#include <linux/tty.h>
				20
				21	#include <asm/intrinsics.h>
				22	#include <asm/processor.h>
				23	#include <asm/rse.h>
				24	#include <asm/uaccess.h>
				25	#include <asm/unaligned.h>
				26
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	27	extern int die_if_kernel(char str, struct pt_regs regs, long err);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	28
				29	#undef DEBUG_UNALIGNED_TRAP
				30
				31	#ifdef DEBUG_UNALIGNED_TRAP
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	32	# define DPRINT(a...) do { printk("%s %u: ", __func__, __LINE__); printk (a); } while (0)
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	33	# define DDUMP(str,vp,len) dump(str, vp, len)
				34
				35	static void
				36	dump (const char str, void vp, size_t len)
				37	{
				38	unsigned char *cp = vp;
				39	int i;
				40
				41	printk("%s", str);
				42	for (i = 0; i < len; ++i)
				43	printk (" %02x", *cp++);
				44	printk("\n");
				45	}
				46	#else
				47	# define DPRINT(a...)
				48	# define DDUMP(str,vp,len)
				49	#endif
				50
				51	#define IA64_FIRST_STACKED_GR 32
				52	#define IA64_FIRST_ROTATING_FR 32
				53	#define SIGN_EXT9 0xffffffffffffff00ul
				54
				55	/*
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	56	* sysctl settable hook which tells the kernel whether to honor the
				57	* IA64_THREAD_UAC_NOPRINT prctl. Because this is user settable, we want
				58	* to allow the super user to enable/disable this for security reasons
				59	* (i.e. don't allow attacker to fill up logs with unaligned accesses).
				60	*/
				61	int no_unaligned_warning;
				62	static int noprint_warning;
				63
				64	/*
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	65	* For M-unit:
				66	*
				67	* opcode \| m \| x6 \|
				68	* --------\|------\|---------\|
				69	* [40-37] \| [36] \| [35:30] \|
				70	* --------\|------\|---------\|
				71	* 4 \| 1 \| 6 \| = 11 bits
				72	* --------------------------
				73	* However bits [31:30] are not directly useful to distinguish between
				74	* load/store so we can use [35:32] instead, which gives the following
				75	* mask ([40:32]) using 9 bits. The 'e' comes from the fact that we defer
				76	* checking the m-bit until later in the load/store emulation.
				77	*/
				78	#define IA64_OPCODE_MASK 0x1ef
				79	#define IA64_OPCODE_SHIFT 32
				80
				81	/*
				82	* Table C-28 Integer Load/Store
				83	*
				84	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				85	*
				86	* ld8.fill, st8.fill MUST be aligned because the RNATs are based on
				87	* the address (bits [8:3]), so we must failed.
				88	*/
				89	#define LD_OP 0x080
				90	#define LDS_OP 0x081
				91	#define LDA_OP 0x082
				92	#define LDSA_OP 0x083
				93	#define LDBIAS_OP 0x084
				94	#define LDACQ_OP 0x085
				95	/* 0x086, 0x087 are not relevant */
				96	#define LDCCLR_OP 0x088
				97	#define LDCNC_OP 0x089
				98	#define LDCCLRACQ_OP 0x08a
				99	#define ST_OP 0x08c
				100	#define STREL_OP 0x08d
				101	/* 0x08e,0x8f are not relevant */
				102
				103	/*
				104	* Table C-29 Integer Load +Reg
				105	*
				106	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				107	* a load/store of this form.
				108	*/
				109
				110	/*
				111	* Table C-30 Integer Load/Store +Imm
				112	*
				113	* We ignore [35:32]= 0x6, 0x7, 0xE, 0xF
				114	*
				115	* ld8.fill, st8.fill must be aligned because the Nat register are based on
				116	* the address, so we must fail and the program must be fixed.
				117	*/
				118	#define LD_IMM_OP 0x0a0
				119	#define LDS_IMM_OP 0x0a1
				120	#define LDA_IMM_OP 0x0a2
				121	#define LDSA_IMM_OP 0x0a3
				122	#define LDBIAS_IMM_OP 0x0a4
				123	#define LDACQ_IMM_OP 0x0a5
				124	/* 0x0a6, 0xa7 are not relevant */
				125	#define LDCCLR_IMM_OP 0x0a8
				126	#define LDCNC_IMM_OP 0x0a9
				127	#define LDCCLRACQ_IMM_OP 0x0aa
				128	#define ST_IMM_OP 0x0ac
				129	#define STREL_IMM_OP 0x0ad
				130	/* 0x0ae,0xaf are not relevant */
				131
				132	/*
				133	* Table C-32 Floating-point Load/Store
				134	*/
				135	#define LDF_OP 0x0c0
				136	#define LDFS_OP 0x0c1
				137	#define LDFA_OP 0x0c2
				138	#define LDFSA_OP 0x0c3
				139	/* 0x0c6 is irrelevant */
				140	#define LDFCCLR_OP 0x0c8
				141	#define LDFCNC_OP 0x0c9
				142	/* 0x0cb is irrelevant */
				143	#define STF_OP 0x0cc
				144
				145	/*
				146	* Table C-33 Floating-point Load +Reg
				147	*
				148	* we use the ld->m (bit [36:36]) field to determine whether or not we have
				149	* a load/store of this form.
				150	*/
				151
				152	/*
				153	* Table C-34 Floating-point Load/Store +Imm
				154	*/
				155	#define LDF_IMM_OP 0x0e0
				156	#define LDFS_IMM_OP 0x0e1
				157	#define LDFA_IMM_OP 0x0e2
				158	#define LDFSA_IMM_OP 0x0e3
				159	/* 0x0e6 is irrelevant */
				160	#define LDFCCLR_IMM_OP 0x0e8
				161	#define LDFCNC_IMM_OP 0x0e9
				162	#define STF_IMM_OP 0x0ec
				163
				164	typedef struct {
				165	unsigned long qp:6; /* [0:5] */
				166	unsigned long r1:7; /* [6:12] */
				167	unsigned long imm:7; /* [13:19] */
				168	unsigned long r3:7; /* [20:26] */
				169	unsigned long x:1; /* [27:27] */
				170	unsigned long hint:2; /* [28:29] */
				171	unsigned long x6_sz:2; /* [30:31] */
				172	unsigned long x6_op:4; /* [32:35], x6 = x6_sz\|x6_op */
				173	unsigned long m:1; /* [36:36] */
				174	unsigned long op:4; /* [37:40] */
				175	unsigned long pad:23; /* [41:63] */
				176	} load_store_t;
				177
				178
				179	typedef enum {
				180	UPD_IMMEDIATE, /* ldXZ r1=[r3],imm(9) */
				181	UPD_REG /* ldXZ r1=[r3],r2 */
				182	} update_t;
				183
				184	/*
				185	* We use tables to keep track of the offsets of registers in the saved state.
				186	* This way we save having big switch/case statements.
				187	*
				188	* We use bit 0 to indicate switch_stack or pt_regs.
				189	* The offset is simply shifted by 1 bit.
				190	* A 2-byte value should be enough to hold any kind of offset
				191	*
				192	* In case the calling convention changes (and thus pt_regs/switch_stack)
				193	* simply use RSW instead of RPT or vice-versa.
				194	*/
				195
				196	#define RPO(x) ((size_t) &((struct pt_regs *)0)->x)
				197	#define RSO(x) ((size_t) &((struct switch_stack *)0)->x)
				198
				199	#define RPT(x) (RPO(x) << 1)
				200	#define RSW(x) (1\| RSO(x)<<1)
				201
				202	#define GR_OFFS(x) (gr_info[x]>>1)
				203	#define GR_IN_SW(x) (gr_info[x] & 0x1)
				204
				205	#define FR_OFFS(x) (fr_info[x]>>1)
				206	#define FR_IN_SW(x) (fr_info[x] & 0x1)
				207
				208	static u16 gr_info[32]={
				209	0, /* r0 is read-only : WE SHOULD NEVER GET THIS */
				210
				211	RPT(r1), RPT(r2), RPT(r3),
				212
				213	RSW(r4), RSW(r5), RSW(r6), RSW(r7),
				214
				215	RPT(r8), RPT(r9), RPT(r10), RPT(r11),
				216	RPT(r12), RPT(r13), RPT(r14), RPT(r15),
				217
				218	RPT(r16), RPT(r17), RPT(r18), RPT(r19),
				219	RPT(r20), RPT(r21), RPT(r22), RPT(r23),
				220	RPT(r24), RPT(r25), RPT(r26), RPT(r27),
				221	RPT(r28), RPT(r29), RPT(r30), RPT(r31)
				222	};
				223
				224	static u16 fr_info[32]={
				225	0, /* constant : WE SHOULD NEVER GET THIS */
				226	0, /* constant : WE SHOULD NEVER GET THIS */
				227
				228	RSW(f2), RSW(f3), RSW(f4), RSW(f5),
				229
				230	RPT(f6), RPT(f7), RPT(f8), RPT(f9),
				231	RPT(f10), RPT(f11),
				232
				233	RSW(f12), RSW(f13), RSW(f14),
				234	RSW(f15), RSW(f16), RSW(f17), RSW(f18), RSW(f19),
				235	RSW(f20), RSW(f21), RSW(f22), RSW(f23), RSW(f24),
				236	RSW(f25), RSW(f26), RSW(f27), RSW(f28), RSW(f29),
				237	RSW(f30), RSW(f31)
				238	};
				239
				240	/* Invalidate ALAT entry for integer register REGNO. */
				241	static void
				242	invala_gr (int regno)
				243	{
				244	# define F(reg) case reg: ia64_invala_gr(reg); break
				245
				246	switch (regno) {
				247	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				248	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				249	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				250	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				251	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				252	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				253	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				254	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				255	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				256	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				257	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				258	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				259	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				260	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				261	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				262	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				263	}
				264	# undef F
				265	}
				266
				267	/* Invalidate ALAT entry for floating-point register REGNO. */
				268	static void
				269	invala_fr (int regno)
				270	{
				271	# define F(reg) case reg: ia64_invala_fr(reg); break
				272
				273	switch (regno) {
				274	F( 0); F( 1); F( 2); F( 3); F( 4); F( 5); F( 6); F( 7);
				275	F( 8); F( 9); F( 10); F( 11); F( 12); F( 13); F( 14); F( 15);
				276	F( 16); F( 17); F( 18); F( 19); F( 20); F( 21); F( 22); F( 23);
				277	F( 24); F( 25); F( 26); F( 27); F( 28); F( 29); F( 30); F( 31);
				278	F( 32); F( 33); F( 34); F( 35); F( 36); F( 37); F( 38); F( 39);
				279	F( 40); F( 41); F( 42); F( 43); F( 44); F( 45); F( 46); F( 47);
				280	F( 48); F( 49); F( 50); F( 51); F( 52); F( 53); F( 54); F( 55);
				281	F( 56); F( 57); F( 58); F( 59); F( 60); F( 61); F( 62); F( 63);
				282	F( 64); F( 65); F( 66); F( 67); F( 68); F( 69); F( 70); F( 71);
				283	F( 72); F( 73); F( 74); F( 75); F( 76); F( 77); F( 78); F( 79);
				284	F( 80); F( 81); F( 82); F( 83); F( 84); F( 85); F( 86); F( 87);
				285	F( 88); F( 89); F( 90); F( 91); F( 92); F( 93); F( 94); F( 95);
				286	F( 96); F( 97); F( 98); F( 99); F(100); F(101); F(102); F(103);
				287	F(104); F(105); F(106); F(107); F(108); F(109); F(110); F(111);
				288	F(112); F(113); F(114); F(115); F(116); F(117); F(118); F(119);
				289	F(120); F(121); F(122); F(123); F(124); F(125); F(126); F(127);
				290	}
				291	# undef F
				292	}
				293
				294	static inline unsigned long
				295	rotate_reg (unsigned long sor, unsigned long rrb, unsigned long reg)
				296	{
				297	reg += rrb;
				298	if (reg >= sor)
				299	reg -= sor;
				300	return reg;
				301	}
				302
				303	static void
				304	set_rse_reg (struct pt_regs *regs, unsigned long r1, unsigned long val, int nat)
				305	{
				306	struct switch_stack sw = (struct switch_stack ) regs - 1;
				307	unsigned long bsp, bspstore, addr, rnat_addr, *ubs_end;
				308	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				309	unsigned long rnats, nat_mask;
				310	unsigned long on_kbs;
				311	long sof = (regs->cr_ifs) & 0x7f;
				312	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				313	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				314	long ridx = r1 - 32;
				315
				316	if (ridx >= sof) {
				317	/* this should never happen, as the "rsvd register fault" has higher priority */
				318	DPRINT("ignoring write to r%lu; only %lu registers are allocated!\n", r1, sof);
				319	return;
				320	}
				321
				322	if (ridx < sor)
				323	ridx = rotate_reg(sor, rrb_gr, ridx);
				324
				325	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				326	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				327
				328	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				329	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				330	if (addr >= kbs) {
				331	/* the register is on the kernel backing store: easy... */
				332	rnat_addr = ia64_rse_rnat_addr(addr);
				333	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				334	rnat_addr = &sw->ar_rnat;
				335	nat_mask = 1UL << ia64_rse_slot_num(addr);
				336
				337	*addr = val;
				338	if (nat)
				339	*rnat_addr \|= nat_mask;
				340	else
				341	*rnat_addr &= ~nat_mask;
				342	return;
				343	}
				344
				345	if (!user_stack(current, regs)) {
				346	DPRINT("ignoring kernel write to r%lu; register isn't on the kernel RBS!", r1);
				347	return;
				348	}
				349
				350	bspstore = (unsigned long *)regs->ar_bspstore;
				351	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				352	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				353	addr = ia64_rse_skip_regs(bsp, ridx);
				354
				355	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				356
				357	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				358
				359	rnat_addr = ia64_rse_rnat_addr(addr);
				360
				361	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				362	DPRINT("rnat @%p = 0x%lx nat=%d old nat=%ld\n",
				363	(void *) rnat_addr, rnats, nat, (rnats >> ia64_rse_slot_num(addr)) & 1);
				364
				365	nat_mask = 1UL << ia64_rse_slot_num(addr);
				366	if (nat)
				367	rnats \|= nat_mask;
				368	else
				369	rnats &= ~nat_mask;
				370	ia64_poke(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, rnats);
				371
				372	DPRINT("rnat changed to @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				373	}
				374
				375
				376	static void
				377	get_rse_reg (struct pt_regs regs, unsigned long r1, unsigned long val, int *nat)
				378	{
				379	struct switch_stack sw = (struct switch_stack ) regs - 1;
				380	unsigned long bsp, addr, rnat_addr, ubs_end, *bspstore;
				381	unsigned long kbs = (void ) current + IA64_RBS_OFFSET;
				382	unsigned long rnats, nat_mask;
				383	unsigned long on_kbs;
				384	long sof = (regs->cr_ifs) & 0x7f;
				385	long sor = 8 * ((regs->cr_ifs >> 14) & 0xf);
				386	long rrb_gr = (regs->cr_ifs >> 18) & 0x7f;
				387	long ridx = r1 - 32;
				388
				389	if (ridx >= sof) {
				390	/* read of out-of-frame register returns an undefined value; 0 in our case. */
				391	DPRINT("ignoring read from r%lu; only %lu registers are allocated!\n", r1, sof);
				392	goto fail;
				393	}
				394
				395	if (ridx < sor)
				396	ridx = rotate_reg(sor, rrb_gr, ridx);
				397
				398	DPRINT("r%lu, sw.bspstore=%lx pt.bspstore=%lx sof=%ld sol=%ld ridx=%ld\n",
				399	r1, sw->ar_bspstore, regs->ar_bspstore, sof, (regs->cr_ifs >> 7) & 0x7f, ridx);
				400
				401	on_kbs = ia64_rse_num_regs(kbs, (unsigned long *) sw->ar_bspstore);
				402	addr = ia64_rse_skip_regs((unsigned long *) sw->ar_bspstore, -sof + ridx);
				403	if (addr >= kbs) {
				404	/* the register is on the kernel backing store: easy... */
				405	val = addr;
				406	if (nat) {
				407	rnat_addr = ia64_rse_rnat_addr(addr);
				408	if ((unsigned long) rnat_addr >= sw->ar_bspstore)
				409	rnat_addr = &sw->ar_rnat;
				410	nat_mask = 1UL << ia64_rse_slot_num(addr);
				411	nat = (rnat_addr & nat_mask) != 0;
				412	}
				413	return;
				414	}
				415
				416	if (!user_stack(current, regs)) {
				417	DPRINT("ignoring kernel read of r%lu; register isn't on the RBS!", r1);
				418	goto fail;
				419	}
				420
				421	bspstore = (unsigned long *)regs->ar_bspstore;
				422	ubs_end = ia64_rse_skip_regs(bspstore, on_kbs);
				423	bsp = ia64_rse_skip_regs(ubs_end, -sof);
				424	addr = ia64_rse_skip_regs(bsp, ridx);
				425
				426	DPRINT("ubs_end=%p bsp=%p addr=%p\n", (void ) ubs_end, (void ) bsp, (void *) addr);
				427
				428	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) addr, val);
				429
				430	if (nat) {
				431	rnat_addr = ia64_rse_rnat_addr(addr);
				432	nat_mask = 1UL << ia64_rse_slot_num(addr);
				433
				434	DPRINT("rnat @%p = 0x%lx\n", (void *) rnat_addr, rnats);
				435
				436	ia64_peek(current, sw, (unsigned long) ubs_end, (unsigned long) rnat_addr, &rnats);
				437	*nat = (rnats & nat_mask) != 0;
				438	}
				439	return;
				440
				441	fail:
				442	*val = 0;
				443	if (nat)
				444	*nat = 0;
				445	return;
				446	}
				447
				448
				449	static void
				450	setreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				451	{
				452	struct switch_stack sw = (struct switch_stack ) regs - 1;
				453	unsigned long addr;
				454	unsigned long bitmask;
				455	unsigned long *unat;
				456
				457	/*
				458	* First takes care of stacked registers
				459	*/
				460	if (regnum >= IA64_FIRST_STACKED_GR) {
				461	set_rse_reg(regs, regnum, val, nat);
				462	return;
				463	}
				464
				465	/*
				466	* Using r0 as a target raises a General Exception fault which has higher priority
				467	* than the Unaligned Reference fault.
				468	*/
				469
				470	/*
				471	* Now look at registers in [0-31] range and init correct UNAT
				472	*/
				473	if (GR_IN_SW(regnum)) {
				474	addr = (unsigned long)sw;
				475	unat = &sw->ar_unat;
				476	} else {
				477	addr = (unsigned long)regs;
				478	unat = &sw->caller_unat;
				479	}
				480	DPRINT("tmp_base=%lx switch_stack=%s offset=%d\n",
				481	addr, unat==&sw->ar_unat ? "yes":"no", GR_OFFS(regnum));
				482	/*
				483	* add offset from base of struct
				484	* and do it !
				485	*/
				486	addr += GR_OFFS(regnum);
				487
				488	(unsigned long )addr = val;
				489
				490	/*
				491	* We need to clear the corresponding UNAT bit to fully emulate the load
				492	* UNAT bit_pos = GR[r3]{8:3} form EAS-2.4
				493	*/
				494	bitmask = 1UL << (addr >> 3 & 0x3f);
				495	DPRINT("0x%lx=0x%lx NaT=%d prev_unat @%p=%lx\n", addr, val, nat, (void ) unat, *unat);
				496	if (nat) {
				497	*unat \|= bitmask;
				498	} else {
				499	*unat &= ~bitmask;
				500	}
				501	DPRINT("0x%lx=0x%lx NaT=%d new unat: %p=%lx\n", addr, val, nat, (void ) unat,*unat);
				502	}
				503
				504	/*
				505	* Return the (rotated) index for floating point register REGNUM (REGNUM must be in the
				506	* range from 32-127, result is in the range from 0-95.
				507	*/
				508	static inline unsigned long
				509	fph_index (struct pt_regs *regs, long regnum)
				510	{
				511	unsigned long rrb_fr = (regs->cr_ifs >> 25) & 0x7f;
				512	return rotate_reg(96, rrb_fr, (regnum - IA64_FIRST_ROTATING_FR));
				513	}
				514
				515	static void
				516	setfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				517	{
				518	struct switch_stack sw = (struct switch_stack )regs - 1;
				519	unsigned long addr;
				520
				521	/*
				522	* From EAS-2.5: FPDisableFault has higher priority than Unaligned
				523	* Fault. Thus, when we get here, we know the partition is enabled.
				524	* To update f32-f127, there are three choices:
				525	*
				526	* (1) save f32-f127 to thread.fph and update the values there
				527	* (2) use a gigantic switch statement to directly access the registers
				528	* (3) generate code on the fly to update the desired register
				529	*
				530	* For now, we are using approach (1).
				531	*/
				532	if (regnum >= IA64_FIRST_ROTATING_FR) {
				533	ia64_sync_fph(current);
				534	current->thread.fph[fph_index(regs, regnum)] = *fpval;
				535	} else {
				536	/*
				537	* pt_regs or switch_stack ?
				538	*/
				539	if (FR_IN_SW(regnum)) {
				540	addr = (unsigned long)sw;
				541	} else {
				542	addr = (unsigned long)regs;
				543	}
				544
				545	DPRINT("tmp_base=%lx offset=%d\n", addr, FR_OFFS(regnum));
				546
				547	addr += FR_OFFS(regnum);
				548	(struct ia64_fpreg )addr = *fpval;
				549
				550	/*
				551	* mark the low partition as being used now
				552	*
				553	* It is highly unlikely that this bit is not already set, but
				554	* let's do it for safety.
				555	*/
				556	regs->cr_ipsr \|= IA64_PSR_MFL;
				557	}
				558	}
				559
				560	/*
				561	* Those 2 inline functions generate the spilled versions of the constant floating point
				562	* registers which can be used with stfX
				563	*/
				564	static inline void
				565	float_spill_f0 (struct ia64_fpreg *final)
				566	{
				567	ia64_stf_spill(final, 0);
				568	}
				569
				570	static inline void
				571	float_spill_f1 (struct ia64_fpreg *final)
				572	{
				573	ia64_stf_spill(final, 1);
				574	}
				575
				576	static void
				577	getfpreg (unsigned long regnum, struct ia64_fpreg fpval, struct pt_regs regs)
				578	{
				579	struct switch_stack sw = (struct switch_stack ) regs - 1;
				580	unsigned long addr;
				581
				582	/*
				583	* From EAS-2.5: FPDisableFault has higher priority than
				584	* Unaligned Fault. Thus, when we get here, we know the partition is
				585	* enabled.
				586	*
				587	* When regnum > 31, the register is still live and we need to force a save
				588	* to current->thread.fph to get access to it. See discussion in setfpreg()
				589	* for reasons and other ways of doing this.
				590	*/
				591	if (regnum >= IA64_FIRST_ROTATING_FR) {
				592	ia64_flush_fph(current);
				593	*fpval = current->thread.fph[fph_index(regs, regnum)];
				594	} else {
				595	/*
				596	* f0 = 0.0, f1= 1.0. Those registers are constant and are thus
				597	* not saved, we must generate their spilled form on the fly
				598	*/
				599	switch(regnum) {
				600	case 0:
				601	float_spill_f0(fpval);
				602	break;
				603	case 1:
				604	float_spill_f1(fpval);
				605	break;
				606	default:
				607	/*
				608	* pt_regs or switch_stack ?
				609	*/
				610	addr = FR_IN_SW(regnum) ? (unsigned long)sw
				611	: (unsigned long)regs;
				612
				613	DPRINT("is_sw=%d tmp_base=%lx offset=0x%x\n",
				614	FR_IN_SW(regnum), addr, FR_OFFS(regnum));
				615
				616	addr += FR_OFFS(regnum);
				617	fpval = (struct ia64_fpreg *)addr;
				618	}
				619	}
				620	}
				621
				622
				623	static void
				624	getreg (unsigned long regnum, unsigned long val, int nat, struct pt_regs *regs)
				625	{
				626	struct switch_stack sw = (struct switch_stack ) regs - 1;
				627	unsigned long addr, *unat;
				628
				629	if (regnum >= IA64_FIRST_STACKED_GR) {
				630	get_rse_reg(regs, regnum, val, nat);
				631	return;
				632	}
				633
				634	/*
				635	* take care of r0 (read-only always evaluate to 0)
				636	*/
				637	if (regnum == 0) {
				638	*val = 0;
				639	if (nat)
				640	*nat = 0;
				641	return;
				642	}
				643
				644	/*
				645	* Now look at registers in [0-31] range and init correct UNAT
				646	*/
				647	if (GR_IN_SW(regnum)) {
				648	addr = (unsigned long)sw;
				649	unat = &sw->ar_unat;
				650	} else {
				651	addr = (unsigned long)regs;
				652	unat = &sw->caller_unat;
				653	}
				654
				655	DPRINT("addr_base=%lx offset=0x%x\n", addr, GR_OFFS(regnum));
				656
				657	addr += GR_OFFS(regnum);
				658
				659	val = (unsigned long *)addr;
				660
				661	/*
				662	* do it only when requested
				663	*/
				664	if (nat)
				665	nat = (unat >> (addr >> 3 & 0x3f)) & 0x1UL;
				666	}
				667
				668	static void
				669	emulate_load_updates (update_t type, load_store_t ld, struct pt_regs *regs, unsigned long ifa)
				670	{
				671	/*
				672	* IMPORTANT:
				673	* Given the way we handle unaligned speculative loads, we should
				674	* not get to this point in the code but we keep this sanity check,
				675	* just in case.
				676	*/
				677	if (ld.x6_op == 1 \|\| ld.x6_op == 3) {
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	678	printk(KERN_ERR "%s: register update on speculative load, error\n", __func__);
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	679	if (die_if_kernel("unaligned reference on speculative load with register update\n",
				680	regs, 30))
				681	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	682	}
				683
				684
				685	/*
				686	* at this point, we know that the base register to update is valid i.e.,
				687	* it's not r0
				688	*/
				689	if (type == UPD_IMMEDIATE) {
				690	unsigned long imm;
				691
				692	/*
				693	* Load +Imm: ldXZ r1=[r3],imm(9)
				694	*
				695	*
				696	* form imm9: [13:19] contain the first 7 bits
				697	*/
				698	imm = ld.x << 7 \| ld.imm;
				699
				700	/*
				701	* sign extend (1+8bits) if m set
				702	*/
				703	if (ld.m) imm \|= SIGN_EXT9;
				704
				705	/*
				706	* ifa == r3 and we know that the NaT bit on r3 was clear so
				707	* we can directly use ifa.
				708	*/
				709	ifa += imm;
				710
				711	setreg(ld.r3, ifa, 0, regs);
				712
				713	DPRINT("ld.x=%d ld.m=%d imm=%ld r3=0x%lx\n", ld.x, ld.m, imm, ifa);
				714
				715	} else if (ld.m) {
				716	unsigned long r2;
				717	int nat_r2;
				718
				719	/*
				720	* Load +Reg Opcode: ldXZ r1=[r3],r2
				721	*
				722	* Note: that we update r3 even in the case of ldfX.a
				723	* (where the load does not happen)
				724	*
				725	* The way the load algorithm works, we know that r3 does not
				726	* have its NaT bit set (would have gotten NaT consumption
				727	* before getting the unaligned fault). So we can use ifa
				728	* which equals r3 at this point.
				729	*
				730	* IMPORTANT:
				731	* The above statement holds ONLY because we know that we
				732	* never reach this code when trying to do a ldX.s.
				733	* If we ever make it to here on an ldfX.s then
				734	*/
				735	getreg(ld.imm, &r2, &nat_r2, regs);
				736
				737	ifa += r2;
				738
				739	/*
				740	* propagate Nat r2 -> r3
				741	*/
				742	setreg(ld.r3, ifa, nat_r2, regs);
				743
				744	DPRINT("imm=%d r2=%ld r3=0x%lx nat_r2=%d\n",ld.imm, r2, ifa, nat_r2);
				745	}
				746	}
				747
				748
				749	static int
				750	emulate_load_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				751	{
				752	unsigned int len = 1 << ld.x6_sz;
				753	unsigned long val = 0;
				754
				755	/*
				756	* r0, as target, doesn't need to be checked because Illegal Instruction
				757	* faults have higher priority than unaligned faults.
				758	*
				759	* r0 cannot be found as the base as it would never generate an
				760	* unaligned reference.
				761	*/
				762
				763	/*
				764	* ldX.a we will emulate load and also invalidate the ALAT entry.
				765	* See comment below for explanation on how we handle ldX.a
				766	*/
				767
				768	if (len != 2 && len != 4 && len != 8) {
				769	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				770	return -1;
				771	}
				772	/* this assumes little-endian byte-order: */
				773	if (copy_from_user(&val, (void __user *) ifa, len))
				774	return -1;
				775	setreg(ld.r1, val, 0, regs);
				776
				777	/*
				778	* check for updates on any kind of loads
				779	*/
				780	if (ld.op == 0x5 \|\| ld.m)
				781	emulate_load_updates(ld.op == 0x5 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				782
				783	/*
				784	* handling of various loads (based on EAS2.4):
				785	*
				786	* ldX.acq (ordered load):
				787	* - acquire semantics would have been used, so force fence instead.
				788	*
				789	* ldX.c.clr (check load and clear):
				790	* - if we get to this handler, it's because the entry was not in the ALAT.
				791	* Therefore the operation reverts to a normal load
				792	*
				793	* ldX.c.nc (check load no clear):
				794	* - same as previous one
				795	*
				796	* ldX.c.clr.acq (ordered check load and clear):
				797	* - same as above for c.clr part. The load needs to have acquire semantics. So
				798	* we use the fence semantics which is stronger and thus ensures correctness.
				799	*
				800	* ldX.a (advanced load):
				801	* - suppose ldX.a r1=[r3]. If we get to the unaligned trap it's because the
				802	* address doesn't match requested size alignment. This means that we would
				803	* possibly need more than one load to get the result.
				804	*
				805	* The load part can be handled just like a normal load, however the difficult
				806	* part is to get the right thing into the ALAT. The critical piece of information
				807	* in the base address of the load & size. To do that, a ld.a must be executed,
				808	* clearly any address can be pushed into the table by using ld1.a r1=[r3]. Now
				809	* if we use the same target register, we will be okay for the check.a instruction.
				810	* If we look at the store, basically a stX [r3]=r1 checks the ALAT for any entry
				811	* which would overlap within [r3,r3+X] (the size of the load was store in the
				812	* ALAT). If such an entry is found the entry is invalidated. But this is not good
				813	* enough, take the following example:
				814	* r3=3
				815	* ld4.a r1=[r3]
				816	*
				817	* Could be emulated by doing:
				818	* ld1.a r1=[r3],1
				819	* store to temporary;
				820	* ld1.a r1=[r3],1
				821	* store & shift to temporary;
				822	* ld1.a r1=[r3],1
				823	* store & shift to temporary;
				824	* ld1.a r1=[r3]
				825	* store & shift to temporary;
				826	* r1=temporary
				827	*
				828	* So in this case, you would get the right value is r1 but the wrong info in
				829	* the ALAT. Notice that you could do it in reverse to finish with address 3
				830	* but you would still get the size wrong. To get the size right, one needs to
				831	* execute exactly the same kind of load. You could do it from a aligned
				832	* temporary location, but you would get the address wrong.
				833	*
				834	* So no matter what, it is not possible to emulate an advanced load
				835	* correctly. But is that really critical ?
				836	*
				837	* We will always convert ld.a into a normal load with ALAT invalidated. This
				838	* will enable compiler to do optimization where certain code path after ld.a
				839	* is not required to have ld.c/chk.a, e.g., code path with no intervening stores.
				840	*
				841	* If there is a store after the advanced load, one must either do a ld.c.* or
				842	* chk.a.* to reuse the value stored in the ALAT. Both can "fail" (meaning no
				843	* entry found in ALAT), and that's perfectly ok because:
				844	*
				845	* - ld.c.*, if the entry is not present a normal load is executed
				846	* - chk.a.*, if the entry is not present, execution jumps to recovery code
				847	*
				848	* In either case, the load can be potentially retried in another form.
				849	*
				850	* ALAT must be invalidated for the register (so that chk.a or ld.c don't pick
				851	* up a stale entry later). The register base update MUST also be performed.
				852	*/
				853
				854	/*
				855	* when the load has the .acq completer then
				856	* use ordering fence.
				857	*/
				858	if (ld.x6_op == 0x5 \|\| ld.x6_op == 0xa)
				859	mb();
				860
				861	/*
				862	* invalidate ALAT entry in case of advanced load
				863	*/
				864	if (ld.x6_op == 0x2)
				865	invala_gr(ld.r1);
				866
				867	return 0;
				868	}
				869
				870	static int
				871	emulate_store_int (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				872	{
				873	unsigned long r2;
				874	unsigned int len = 1 << ld.x6_sz;
				875
				876	/*
				877	* if we get to this handler, Nat bits on both r3 and r2 have already
				878	* been checked. so we don't need to do it
				879	*
				880	* extract the value to be stored
				881	*/
				882	getreg(ld.imm, &r2, NULL, regs);
				883
				884	/*
				885	* we rely on the macros in unaligned.h for now i.e.,
				886	* we let the compiler figure out how to read memory gracefully.
				887	*
				888	* We need this switch/case because the way the inline function
				889	* works. The code is optimized by the compiler and looks like
				890	* a single switch/case.
				891	*/
				892	DPRINT("st%d [%lx]=%lx\n", len, ifa, r2);
				893
				894	if (len != 2 && len != 4 && len != 8) {
				895	DPRINT("unknown size: x6=%d\n", ld.x6_sz);
				896	return -1;
				897	}
				898
				899	/* this assumes little-endian byte-order: */
				900	if (copy_to_user((void __user *) ifa, &r2, len))
				901	return -1;
				902
				903	/*
				904	* stX [r3]=r2,imm(9)
				905	*
				906	* NOTE:
				907	* ld.r3 can never be r0, because r0 would not generate an
				908	* unaligned access.
				909	*/
				910	if (ld.op == 0x5) {
				911	unsigned long imm;
				912
				913	/*
				914	* form imm9: [12:6] contain first 7bits
				915	*/
				916	imm = ld.x << 7 \| ld.r1;
				917	/*
				918	* sign extend (8bits) if m set
				919	*/
				920	if (ld.m) imm \|= SIGN_EXT9;
				921	/*
				922	* ifa == r3 (NaT is necessarily cleared)
				923	*/
				924	ifa += imm;
				925
				926	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				927
				928	setreg(ld.r3, ifa, 0, regs);
				929	}
				930	/*
				931	* we don't have alat_invalidate_multiple() so we need
				932	* to do the complete flush :-<<
				933	*/
				934	ia64_invala();
				935
				936	/*
				937	* stX.rel: use fence instead of release
				938	*/
				939	if (ld.x6_op == 0xd)
				940	mb();
				941
				942	return 0;
				943	}
				944
				945	/*
				946	* floating point operations sizes in bytes
				947	*/
				948	static const unsigned char float_fsz[4]={
				949	10, /* extended precision (e) */
				950	8, /* integer (8) */
				951	4, /* single precision (s) */
				952	8 /* double precision (d) */
				953	};
				954
				955	static inline void
				956	mem2float_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				957	{
				958	ia64_ldfe(6, init);
				959	ia64_stop();
				960	ia64_stf_spill(final, 6);
				961	}
				962
				963	static inline void
				964	mem2float_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				965	{
				966	ia64_ldf8(6, init);
				967	ia64_stop();
				968	ia64_stf_spill(final, 6);
				969	}
				970
				971	static inline void
				972	mem2float_single (struct ia64_fpreg init, struct ia64_fpreg final)
				973	{
				974	ia64_ldfs(6, init);
				975	ia64_stop();
				976	ia64_stf_spill(final, 6);
				977	}
				978
				979	static inline void
				980	mem2float_double (struct ia64_fpreg init, struct ia64_fpreg final)
				981	{
				982	ia64_ldfd(6, init);
				983	ia64_stop();
				984	ia64_stf_spill(final, 6);
				985	}
				986
				987	static inline void
				988	float2mem_extended (struct ia64_fpreg init, struct ia64_fpreg final)
				989	{
				990	ia64_ldf_fill(6, init);
				991	ia64_stop();
				992	ia64_stfe(final, 6);
				993	}
				994
				995	static inline void
				996	float2mem_integer (struct ia64_fpreg init, struct ia64_fpreg final)
				997	{
				998	ia64_ldf_fill(6, init);
				999	ia64_stop();
				1000	ia64_stf8(final, 6);
				1001	}
				1002
				1003	static inline void
				1004	float2mem_single (struct ia64_fpreg init, struct ia64_fpreg final)
				1005	{
				1006	ia64_ldf_fill(6, init);
				1007	ia64_stop();
				1008	ia64_stfs(final, 6);
				1009	}
				1010
				1011	static inline void
				1012	float2mem_double (struct ia64_fpreg init, struct ia64_fpreg final)
				1013	{
				1014	ia64_ldf_fill(6, init);
				1015	ia64_stop();
				1016	ia64_stfd(final, 6);
				1017	}
				1018
				1019	static int
				1020	emulate_load_floatpair (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1021	{
				1022	struct ia64_fpreg fpr_init[2];
				1023	struct ia64_fpreg fpr_final[2];
				1024	unsigned long len = float_fsz[ld.x6_sz];
				1025
				1026	/*
				1027	* fr0 & fr1 don't need to be checked because Illegal Instruction faults have
				1028	* higher priority than unaligned faults.
				1029	*
				1030	* r0 cannot be found as the base as it would never generate an unaligned
				1031	* reference.
				1032	*/
				1033
				1034	/*
				1035	* make sure we get clean buffers
				1036	*/
				1037	memset(&fpr_init, 0, sizeof(fpr_init));
				1038	memset(&fpr_final, 0, sizeof(fpr_final));
				1039
				1040	/*
				1041	* ldfpX.a: we don't try to emulate anything but we must
				1042	* invalidate the ALAT entry and execute updates, if any.
				1043	*/
				1044	if (ld.x6_op != 0x2) {
				1045	/*
				1046	* This assumes little-endian byte-order. Note that there is no "ldfpe"
				1047	* instruction:
				1048	*/
				1049	if (copy_from_user(&fpr_init[0], (void __user *) ifa, len)
				1050	\|\| copy_from_user(&fpr_init[1], (void __user *) (ifa + len), len))
				1051	return -1;
				1052
				1053	DPRINT("ld.r1=%d ld.imm=%d x6_sz=%d\n", ld.r1, ld.imm, ld.x6_sz);
				1054	DDUMP("frp_init =", &fpr_init, 2*len);
				1055	/*
				1056	* XXX fixme
				1057	* Could optimize inlines by using ldfpX & 2 spills
				1058	*/
				1059	switch( ld.x6_sz ) {
				1060	case 0:
				1061	mem2float_extended(&fpr_init[0], &fpr_final[0]);
				1062	mem2float_extended(&fpr_init[1], &fpr_final[1]);
				1063	break;
				1064	case 1:
				1065	mem2float_integer(&fpr_init[0], &fpr_final[0]);
				1066	mem2float_integer(&fpr_init[1], &fpr_final[1]);
				1067	break;
				1068	case 2:
				1069	mem2float_single(&fpr_init[0], &fpr_final[0]);
				1070	mem2float_single(&fpr_init[1], &fpr_final[1]);
				1071	break;
				1072	case 3:
				1073	mem2float_double(&fpr_init[0], &fpr_final[0]);
				1074	mem2float_double(&fpr_init[1], &fpr_final[1]);
				1075	break;
				1076	}
				1077	DDUMP("fpr_final =", &fpr_final, 2*len);
				1078	/*
				1079	* XXX fixme
				1080	*
				1081	* A possible optimization would be to drop fpr_final and directly
				1082	* use the storage from the saved context i.e., the actual final
				1083	* destination (pt_regs, switch_stack or thread structure).
				1084	*/
				1085	setfpreg(ld.r1, &fpr_final[0], regs);
				1086	setfpreg(ld.imm, &fpr_final[1], regs);
				1087	}
				1088
				1089	/*
				1090	* Check for updates: only immediate updates are available for this
				1091	* instruction.
				1092	*/
				1093	if (ld.m) {
				1094	/*
				1095	* the immediate is implicit given the ldsz of the operation:
				1096	* single: 8 (2x4) and for all others it's 16 (2x8)
				1097	*/
				1098	ifa += len<<1;
				1099
				1100	/*
				1101	* IMPORTANT:
				1102	* the fact that we force the NaT of r3 to zero is ONLY valid
				1103	* as long as we don't come here with a ldfpX.s.
				1104	* For this reason we keep this sanity check
				1105	*/
				1106	if (ld.x6_op == 1 \|\| ld.x6_op == 3)
				1107	printk(KERN_ERR "%s: register update on speculative load pair, error\n",
Harvey Harrison	d4ed808	2008-03-04 15:15:00 -0800	[diff] [blame]	1108	__func__);
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1109
				1110	setreg(ld.r3, ifa, 0, regs);
				1111	}
				1112
				1113	/*
				1114	* Invalidate ALAT entries, if any, for both registers.
				1115	*/
				1116	if (ld.x6_op == 0x2) {
				1117	invala_fr(ld.r1);
				1118	invala_fr(ld.imm);
				1119	}
				1120	return 0;
				1121	}
				1122
				1123
				1124	static int
				1125	emulate_load_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1126	{
				1127	struct ia64_fpreg fpr_init;
				1128	struct ia64_fpreg fpr_final;
				1129	unsigned long len = float_fsz[ld.x6_sz];
				1130
				1131	/*
				1132	* fr0 & fr1 don't need to be checked because Illegal Instruction
				1133	* faults have higher priority than unaligned faults.
				1134	*
				1135	* r0 cannot be found as the base as it would never generate an
				1136	* unaligned reference.
				1137	*/
				1138
				1139	/*
				1140	* make sure we get clean buffers
				1141	*/
				1142	memset(&fpr_init,0, sizeof(fpr_init));
				1143	memset(&fpr_final,0, sizeof(fpr_final));
				1144
				1145	/*
				1146	* ldfX.a we don't try to emulate anything but we must
				1147	* invalidate the ALAT entry.
				1148	* See comments in ldX for descriptions on how the various loads are handled.
				1149	*/
				1150	if (ld.x6_op != 0x2) {
				1151	if (copy_from_user(&fpr_init, (void __user *) ifa, len))
				1152	return -1;
				1153
				1154	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1155	DDUMP("fpr_init =", &fpr_init, len);
				1156	/*
				1157	* we only do something for x6_op={0,8,9}
				1158	*/
				1159	switch( ld.x6_sz ) {
				1160	case 0:
				1161	mem2float_extended(&fpr_init, &fpr_final);
				1162	break;
				1163	case 1:
				1164	mem2float_integer(&fpr_init, &fpr_final);
				1165	break;
				1166	case 2:
				1167	mem2float_single(&fpr_init, &fpr_final);
				1168	break;
				1169	case 3:
				1170	mem2float_double(&fpr_init, &fpr_final);
				1171	break;
				1172	}
				1173	DDUMP("fpr_final =", &fpr_final, len);
				1174	/*
				1175	* XXX fixme
				1176	*
				1177	* A possible optimization would be to drop fpr_final and directly
				1178	* use the storage from the saved context i.e., the actual final
				1179	* destination (pt_regs, switch_stack or thread structure).
				1180	*/
				1181	setfpreg(ld.r1, &fpr_final, regs);
				1182	}
				1183
				1184	/*
				1185	* check for updates on any loads
				1186	*/
				1187	if (ld.op == 0x7 \|\| ld.m)
				1188	emulate_load_updates(ld.op == 0x7 ? UPD_IMMEDIATE: UPD_REG, ld, regs, ifa);
				1189
				1190	/*
				1191	* invalidate ALAT entry in case of advanced floating point loads
				1192	*/
				1193	if (ld.x6_op == 0x2)
				1194	invala_fr(ld.r1);
				1195
				1196	return 0;
				1197	}
				1198
				1199
				1200	static int
				1201	emulate_store_float (unsigned long ifa, load_store_t ld, struct pt_regs *regs)
				1202	{
				1203	struct ia64_fpreg fpr_init;
				1204	struct ia64_fpreg fpr_final;
				1205	unsigned long len = float_fsz[ld.x6_sz];
				1206
				1207	/*
				1208	* make sure we get clean buffers
				1209	*/
				1210	memset(&fpr_init,0, sizeof(fpr_init));
				1211	memset(&fpr_final,0, sizeof(fpr_final));
				1212
				1213	/*
				1214	* if we get to this handler, Nat bits on both r3 and r2 have already
				1215	* been checked. so we don't need to do it
				1216	*
				1217	* extract the value to be stored
				1218	*/
				1219	getfpreg(ld.imm, &fpr_init, regs);
				1220	/*
				1221	* during this step, we extract the spilled registers from the saved
				1222	* context i.e., we refill. Then we store (no spill) to temporary
				1223	* aligned location
				1224	*/
				1225	switch( ld.x6_sz ) {
				1226	case 0:
				1227	float2mem_extended(&fpr_init, &fpr_final);
				1228	break;
				1229	case 1:
				1230	float2mem_integer(&fpr_init, &fpr_final);
				1231	break;
				1232	case 2:
				1233	float2mem_single(&fpr_init, &fpr_final);
				1234	break;
				1235	case 3:
				1236	float2mem_double(&fpr_init, &fpr_final);
				1237	break;
				1238	}
				1239	DPRINT("ld.r1=%d x6_sz=%d\n", ld.r1, ld.x6_sz);
				1240	DDUMP("fpr_init =", &fpr_init, len);
				1241	DDUMP("fpr_final =", &fpr_final, len);
				1242
				1243	if (copy_to_user((void __user *) ifa, &fpr_final, len))
				1244	return -1;
				1245
				1246	/*
				1247	* stfX [r3]=r2,imm(9)
				1248	*
				1249	* NOTE:
				1250	* ld.r3 can never be r0, because r0 would not generate an
				1251	* unaligned access.
				1252	*/
				1253	if (ld.op == 0x7) {
				1254	unsigned long imm;
				1255
				1256	/*
				1257	* form imm9: [12:6] contain first 7bits
				1258	*/
				1259	imm = ld.x << 7 \| ld.r1;
				1260	/*
				1261	* sign extend (8bits) if m set
				1262	*/
				1263	if (ld.m)
				1264	imm \|= SIGN_EXT9;
				1265	/*
				1266	* ifa == r3 (NaT is necessarily cleared)
				1267	*/
				1268	ifa += imm;
				1269
				1270	DPRINT("imm=%lx r3=%lx\n", imm, ifa);
				1271
				1272	setreg(ld.r3, ifa, 0, regs);
				1273	}
				1274	/*
				1275	* we don't have alat_invalidate_multiple() so we need
				1276	* to do the complete flush :-<<
				1277	*/
				1278	ia64_invala();
				1279
				1280	return 0;
				1281	}
				1282
				1283	/*
				1284	* Make sure we log the unaligned access, so that user/sysadmin can notice it and
				1285	* eventually fix the program. However, we don't want to do that for every access so we
				1286	* pace it with jiffies. This isn't really MP-safe, but it doesn't really have to be
				1287	* either...
				1288	*/
				1289	static int
				1290	within_logging_rate_limit (void)
				1291	{
				1292	static unsigned long count, last_time;
				1293
S.Caglar Onur	5cf1f7c	2008-03-28 14:27:05 -0700	[diff] [blame]	1294	if (time_after(jiffies, last_time + 5 * HZ))
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1295	count = 0;
Jack Steiner	79c83bd	2006-01-24 16:32:11 -0600	[diff] [blame]	1296	if (count < 5) {
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1297	last_time = jiffies;
Jack Steiner	79c83bd	2006-01-24 16:32:11 -0600	[diff] [blame]	1298	count++;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1299	return 1;
				1300	}
				1301	return 0;
				1302
				1303	}
				1304
				1305	void
				1306	ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs)
				1307	{
				1308	struct ia64_psr *ipsr = ia64_psr(regs);
				1309	mm_segment_t old_fs = get_fs();
				1310	unsigned long bundle[2];
				1311	unsigned long opcode;
				1312	struct siginfo si;
				1313	const struct exception_table_entry *eh = NULL;
				1314	union {
				1315	unsigned long l;
				1316	load_store_t insn;
				1317	} u;
				1318	int ret = -1;
				1319
				1320	if (ia64_psr(regs)->be) {
				1321	/* we don't support big-endian accesses */
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	1322	if (die_if_kernel("big-endian unaligned accesses are not supported", regs, 0))
				1323	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1324	goto force_sigbus;
				1325	}
				1326
				1327	/*
				1328	* Treat kernel accesses for which there is an exception handler entry the same as
				1329	* user-level unaligned accesses. Otherwise, a clever program could trick this
				1330	* handler into reading an arbitrary kernel addresses...
				1331	*/
				1332	if (!user_mode(regs))
				1333	eh = search_exception_tables(regs->cr_iip + ia64_psr(regs)->ri);
				1334	if (user_mode(regs) \|\| eh) {
				1335	if ((current->thread.flags & IA64_THREAD_UAC_SIGBUS) != 0)
				1336	goto force_sigbus;
				1337
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1338	if (!no_unaligned_warning &&
				1339	!(current->thread.flags & IA64_THREAD_UAC_NOPRINT) &&
				1340	within_logging_rate_limit())
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1341	{
				1342	char buf[200]; /* comm[] is at most 16 bytes... */
				1343	size_t len;
				1344
				1345	len = sprintf(buf, "%s(%d): unaligned access to 0x%016lx, "
Alexey Dobriyan	19c5870	2007-10-18 23:40:41 -0700	[diff] [blame]	1346	"ip=0x%016lx\n\r", current->comm,
				1347	task_pid_nr(current),
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1348	ifa, regs->cr_iip + ipsr->ri);
				1349	/*
				1350	* Don't call tty_write_message() if we're in the kernel; we might
				1351	* be holding locks...
				1352	*/
				1353	if (user_mode(regs))
				1354	tty_write_message(current->signal->tty, buf);
				1355	buf[len-1] = '\0'; /* drop '\r' */
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1356	/* watch for command names containing %s */
				1357	printk(KERN_WARNING "%s", buf);
				1358	} else {
				1359	if (no_unaligned_warning && !noprint_warning) {
				1360	noprint_warning = 1;
				1361	printk(KERN_WARNING "%s(%d) encountered an "
				1362	"unaligned exception which required\n"
				1363	"kernel assistance, which degrades "
				1364	"the performance of the application.\n"
				1365	"Unaligned exception warnings have "
				1366	"been disabled by the system "
				1367	"administrator\n"
				1368	"echo 0 > /proc/sys/kernel/ignore-"
				1369	"unaligned-usertrap to re-enable\n",
Alexey Dobriyan	19c5870	2007-10-18 23:40:41 -0700	[diff] [blame]	1370	current->comm, task_pid_nr(current));
Jes Sorensen	d2b176e	2006-02-28 09:42:23 -0800	[diff] [blame]	1371	}
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1372	}
				1373	} else {
				1374	if (within_logging_rate_limit())
				1375	printk(KERN_WARNING "kernel unaligned access to 0x%016lx, ip=0x%016lx\n",
				1376	ifa, regs->cr_iip + ipsr->ri);
				1377	set_fs(KERNEL_DS);
				1378	}
				1379
				1380	DPRINT("iip=%lx ifa=%lx isr=%lx (ei=%d, sp=%d)\n",
				1381	regs->cr_iip, ifa, regs->cr_ipsr, ipsr->ri, ipsr->it);
				1382
				1383	if (__copy_from_user(bundle, (void __user *) regs->cr_iip, 16))
				1384	goto failure;
				1385
				1386	/*
				1387	* extract the instruction from the bundle given the slot number
				1388	*/
				1389	switch (ipsr->ri) {
				1390	case 0: u.l = (bundle[0] >> 5); break;
				1391	case 1: u.l = (bundle[0] >> 46) \| (bundle[1] << 18); break;
				1392	case 2: u.l = (bundle[1] >> 23); break;
				1393	}
				1394	opcode = (u.l >> IA64_OPCODE_SHIFT) & IA64_OPCODE_MASK;
				1395
				1396	DPRINT("opcode=%lx ld.qp=%d ld.r1=%d ld.imm=%d ld.r3=%d ld.x=%d ld.hint=%d "
				1397	"ld.x6=0x%x ld.m=%d ld.op=%d\n", opcode, u.insn.qp, u.insn.r1, u.insn.imm,
				1398	u.insn.r3, u.insn.x, u.insn.hint, u.insn.x6_sz, u.insn.m, u.insn.op);
				1399
				1400	/*
				1401	* IMPORTANT:
				1402	* Notice that the switch statement DOES not cover all possible instructions
				1403	* that DO generate unaligned references. This is made on purpose because for some
				1404	* instructions it DOES NOT make sense to try and emulate the access. Sometimes it
				1405	* is WRONG to try and emulate. Here is a list of instruction we don't emulate i.e.,
				1406	* the program will get a signal and die:
				1407	*
				1408	* load/store:
				1409	* - ldX.spill
				1410	* - stX.spill
				1411	* Reason: RNATs are based on addresses
				1412	* - ld16
				1413	* - st16
				1414	* Reason: ld16 and st16 are supposed to occur in a single
				1415	* memory op
				1416	*
				1417	* synchronization:
				1418	* - cmpxchg
				1419	* - fetchadd
				1420	* - xchg
				1421	* Reason: ATOMIC operations cannot be emulated properly using multiple
				1422	* instructions.
				1423	*
				1424	* speculative loads:
				1425	* - ldX.sZ
				1426	* Reason: side effects, code must be ready to deal with failure so simpler
				1427	* to let the load fail.
				1428	* ---------------------------------------------------------------------------------
				1429	* XXX fixme
				1430	*
				1431	* I would like to get rid of this switch case and do something
				1432	* more elegant.
				1433	*/
				1434	switch (opcode) {
				1435	case LDS_OP:
				1436	case LDSA_OP:
				1437	if (u.insn.x)
				1438	/* oops, really a semaphore op (cmpxchg, etc) */
				1439	goto failure;
				1440	/* no break */
				1441	case LDS_IMM_OP:
				1442	case LDSA_IMM_OP:
				1443	case LDFS_OP:
				1444	case LDFSA_OP:
				1445	case LDFS_IMM_OP:
				1446	/*
				1447	* The instruction will be retried with deferred exceptions turned on, and
				1448	* we should get Nat bit installed
				1449	*
				1450	* IMPORTANT: When PSR_ED is set, the register & immediate update forms
				1451	* are actually executed even though the operation failed. So we don't
				1452	* need to take care of this.
				1453	*/
				1454	DPRINT("forcing PSR_ED\n");
				1455	regs->cr_ipsr \|= IA64_PSR_ED;
				1456	goto done;
				1457
				1458	case LD_OP:
				1459	case LDA_OP:
				1460	case LDBIAS_OP:
				1461	case LDACQ_OP:
				1462	case LDCCLR_OP:
				1463	case LDCNC_OP:
				1464	case LDCCLRACQ_OP:
				1465	if (u.insn.x)
				1466	/* oops, really a semaphore op (cmpxchg, etc) */
				1467	goto failure;
				1468	/* no break */
				1469	case LD_IMM_OP:
				1470	case LDA_IMM_OP:
				1471	case LDBIAS_IMM_OP:
				1472	case LDACQ_IMM_OP:
				1473	case LDCCLR_IMM_OP:
				1474	case LDCNC_IMM_OP:
				1475	case LDCCLRACQ_IMM_OP:
				1476	ret = emulate_load_int(ifa, u.insn, regs);
				1477	break;
				1478
				1479	case ST_OP:
				1480	case STREL_OP:
				1481	if (u.insn.x)
				1482	/* oops, really a semaphore op (cmpxchg, etc) */
				1483	goto failure;
				1484	/* no break */
				1485	case ST_IMM_OP:
				1486	case STREL_IMM_OP:
				1487	ret = emulate_store_int(ifa, u.insn, regs);
				1488	break;
				1489
				1490	case LDF_OP:
				1491	case LDFA_OP:
				1492	case LDFCCLR_OP:
				1493	case LDFCNC_OP:
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1494	if (u.insn.x)
				1495	ret = emulate_load_floatpair(ifa, u.insn, regs);
				1496	else
				1497	ret = emulate_load_float(ifa, u.insn, regs);
				1498	break;
				1499
Luck, Tony	1a49915	2008-01-14 09:59:24 -0800	[diff] [blame]	1500	case LDF_IMM_OP:
				1501	case LDFA_IMM_OP:
				1502	case LDFCCLR_IMM_OP:
				1503	case LDFCNC_IMM_OP:
				1504	ret = emulate_load_float(ifa, u.insn, regs);
				1505	break;
				1506
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1507	case STF_OP:
				1508	case STF_IMM_OP:
				1509	ret = emulate_store_float(ifa, u.insn, regs);
				1510	break;
				1511
				1512	default:
				1513	goto failure;
				1514	}
				1515	DPRINT("ret=%d\n", ret);
				1516	if (ret)
				1517	goto failure;
				1518
				1519	if (ipsr->ri == 2)
				1520	/*
				1521	* given today's architecture this case is not likely to happen because a
				1522	* memory access instruction (M) can never be in the last slot of a
				1523	* bundle. But let's keep it for now.
				1524	*/
				1525	regs->cr_iip += 16;
				1526	ipsr->ri = (ipsr->ri + 1) & 0x3;
				1527
				1528	DPRINT("ipsr->ri=%d iip=%lx\n", ipsr->ri, regs->cr_iip);
				1529	done:
				1530	set_fs(old_fs); /* restore original address limit */
				1531	return;
				1532
				1533	failure:
				1534	/* something went wrong... */
				1535	if (!user_mode(regs)) {
				1536	if (eh) {
				1537	ia64_handle_exception(regs, eh);
				1538	goto done;
				1539	}
Jan Beulich	620de2f	2008-02-04 23:43:03 -0800	[diff] [blame]	1540	if (die_if_kernel("error during unaligned kernel access\n", regs, ret))
				1541	return;
Linus Torvalds	1da177e	2005-04-16 15:20:36 -0700	[diff] [blame]	1542	/* NOT_REACHED */
				1543	}
				1544	force_sigbus:
				1545	si.si_signo = SIGBUS;
				1546	si.si_errno = 0;
				1547	si.si_code = BUS_ADRALN;
				1548	si.si_addr = (void __user *) ifa;
				1549	si.si_flags = 0;
				1550	si.si_isr = 0;
				1551	si.si_imm = 0;
				1552	force_sig_info(SIGBUS, &si, current);
				1553	goto done;
				1554	}