ARM: P2V: extend to 16-bit translation offsets

MSM's memory is aligned to 2MB, which is more than we can do with our
existing method as we're limited to the upper 8 bits.  Extend this by
using two instructions to 16 bits, automatically selected when MSM is
enabled.

Acked-by: Tony Lindgren <tony@atomide.com>
Reviewed-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Tested-by: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S
index 1db8ead..a94dd99 100644
--- a/arch/arm/kernel/head.S
+++ b/arch/arm/kernel/head.S
@@ -456,8 +456,13 @@
 	add	r4, r4, r3	@ adjust table start address
 	add	r5, r5, r3	@ adjust table end address
 	str	r8, [r7, r3]!	@ save computed PHYS_OFFSET to __pv_phys_offset
+#ifndef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
 	mov	r6, r3, lsr #24	@ constant for add/sub instructions
 	teq	r3, r6, lsl #24 @ must be 16MiB aligned
+#else
+	mov	r6, r3, lsr #16	@ constant for add/sub instructions
+	teq	r3, r6, lsl #16	@ must be 64kiB aligned
+#endif
 	bne	__error
 	str	r6, [r7, #4]	@ save to __pv_offset
 	b	__fixup_a_pv_table
@@ -471,10 +476,18 @@
 
 	.text
 __fixup_a_pv_table:
+#ifdef CONFIG_ARM_PATCH_PHYS_VIRT_16BIT
+	and	r0, r6, #255	@ offset bits 23-16
+	mov	r6, r6, lsr #8	@ offset bits 31-24
+#else
+	mov	r0, #0		@ just in case...
+#endif
 	b	3f
 2:	ldr	ip, [r7, r3]
 	bic	ip, ip, #0x000000ff
-	orr	ip, ip, r6
+	tst	ip, #0x400	@ rotate shift tells us LS or MS byte
+	orrne	ip, ip, r6	@ mask in offset bits 31-24
+	orreq	ip, ip, r0	@ mask in offset bits 23-16
 	str	ip, [r7, r3]
 3:	cmp	r4, r5
 	ldrcc	r7, [r4], #4	@ use branch for delay slot