gpu: msm2: Sync to upstream

* Sync to android-msm-3.4-flo-lollipop-release

Change-Id: Ia0df71c0a1f102de034860f4d212a05c806fef7d
diff --git a/drivers/gpu/msm2/Makefile b/drivers/gpu/msm2/Makefile
index da56a4c..dcfb4ed 100644
--- a/drivers/gpu/msm2/Makefile
+++ b/drivers/gpu/msm2/Makefile
@@ -25,8 +25,8 @@
 	adreno_dispatch.o \
 	adreno_postmortem.o \
 	adreno_snapshot.o \
-	adreno_trace.o \
 	adreno_coresight.o \
+	adreno_trace.o \
 	adreno_a2xx.o \
 	adreno_a2xx_trace.o \
 	adreno_a2xx_snapshot.o \
diff --git a/drivers/gpu/msm2/a2xx_reg.h b/drivers/gpu/msm2/a2xx_reg.h
index c70c4eb..b2fb99f 100644
--- a/drivers/gpu/msm2/a2xx_reg.h
+++ b/drivers/gpu/msm2/a2xx_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -252,7 +252,15 @@
 #define REG_CP_CSQ_IB1_STAT              0x01FE
 #define REG_CP_CSQ_IB2_STAT              0x01FF
 #define REG_CP_CSQ_RB_STAT               0x01FD
+
 #define REG_CP_DEBUG                     0x01FC
+/*
+ * CP DEBUG settings for a3xx and a2xx cores:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A2XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
 #define REG_CP_IB1_BASE                  0x0458
 #define REG_CP_IB1_BUFSZ                 0x0459
 #define REG_CP_IB2_BASE                  0x045A
diff --git a/drivers/gpu/msm2/a4xx_reg.h b/drivers/gpu/msm2/a4xx_reg.h
index 56147f7..53ed86d 100644
--- a/drivers/gpu/msm2/a4xx_reg.h
+++ b/drivers/gpu/msm2/a4xx_reg.h
@@ -24,17 +24,201 @@
 #define A4XX_RBBM_AHB_CTL1		0x24
 #define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL	0x2b
 #define A4XX_RBBM_INTERFACE_HANG_INT_CTL	0x2f
-#define A4XX_RBBM_AHB_ERROR_STATUS	0x18f
-#define A4XX_RBBM_STATUS		0x191
 #define A4XX_RBBM_INT_CLEAR_CMD		0x36
 #define A4XX_RBBM_INT_0_MASK		0x37
-#define A4XX_RBBM_INT_0_STATUS		0x17d
-#define A4XX_RBBM_PERFCTR_CTL		0x170
-#define A4XX_RBBM_PERFCTR_LOAD_CMD0	0x171
-#define A4XX_RBBM_PERFCTR_LOAD_CMD1	0x172
-#define A4XX_RBBM_PERFCTR_LOAD_CMD2	0x173
-#define A4XX_RBBM_GPU_BUSY_MASKED	0x17a
-#define A4XX_RBBM_PERFCTR_PWR_1_LO	0x168
+
+#define A4XX_RBBM_PERFCTR_CP_0_LO		0x9c
+#define A4XX_RBBM_PERFCTR_CP_0_HI		0x9d
+#define A4XX_RBBM_PERFCTR_CP_1_LO		0x9e
+#define A4XX_RBBM_PERFCTR_CP_1_HI		0x9f
+#define A4XX_RBBM_PERFCTR_RBBM_0_LO		0xac
+#define A4XX_RBBM_PERFCTR_RBBM_0_HI		0xad
+#define A4XX_RBBM_PERFCTR_RBBM_1_LO		0xae
+#define A4XX_RBBM_PERFCTR_RBBM_1_HI		0xaf
+#define A4XX_RBBM_PERFCTR_RBBM_2_LO		0xb0
+#define A4XX_RBBM_PERFCTR_RBBM_2_HI		0xb1
+#define A4XX_RBBM_PERFCTR_RBBM_3_LO		0xb2
+#define A4XX_RBBM_PERFCTR_RBBM_3_HI		0xb3
+#define A4XX_RBBM_PERFCTR_PC_0_LO		0xb4
+#define A4XX_RBBM_PERFCTR_PC_0_HI		0xb5
+#define A4XX_RBBM_PERFCTR_PC_1_LO		0xb6
+#define A4XX_RBBM_PERFCTR_PC_1_HI		0xb7
+#define A4XX_RBBM_PERFCTR_PC_2_LO		0xb8
+#define A4XX_RBBM_PERFCTR_PC_2_HI		0xb9
+#define A4XX_RBBM_PERFCTR_PC_3_LO		0xba
+#define A4XX_RBBM_PERFCTR_PC_3_HI		0xbb
+#define A4XX_RBBM_PERFCTR_PC_4_LO		0xbc
+#define A4XX_RBBM_PERFCTR_PC_4_HI		0xbd
+#define A4XX_RBBM_PERFCTR_PC_5_LO		0xbe
+#define A4XX_RBBM_PERFCTR_PC_5_HI		0xbf
+#define A4XX_RBBM_PERFCTR_PC_6_LO		0xc0
+#define A4XX_RBBM_PERFCTR_PC_6_HI		0xc1
+#define A4XX_RBBM_PERFCTR_PC_7_LO		0xc2
+#define A4XX_RBBM_PERFCTR_PC_7_HI		0xc3
+#define A4XX_RBBM_PERFCTR_VFD_0_LO		0xc4
+#define A4XX_RBBM_PERFCTR_VFD_0_HI		0xc5
+#define A4XX_RBBM_PERFCTR_VFD_1_LO		0xc6
+#define A4XX_RBBM_PERFCTR_VFD_1_HI		0xc7
+#define A4XX_RBBM_PERFCTR_VFD_2_LO		0xc8
+#define A4XX_RBBM_PERFCTR_VFD_2_HI		0xc9
+#define A4XX_RBBM_PERFCTR_VFD_3_LO		0xca
+#define A4XX_RBBM_PERFCTR_VFD_3_HI		0xcb
+#define A4XX_RBBM_PERFCTR_VFD_4_LO		0xcc
+#define A4XX_RBBM_PERFCTR_VFD_4_HI		0xcd
+#define A4XX_RBBM_PERFCTR_VFD_5_LO		0xce
+#define A4XX_RBBM_PERFCTR_VFD_5_HI		0xcf
+#define A4XX_RBBM_PERFCTR_VFD_6_LO		0xd0
+#define A4XX_RBBM_PERFCTR_VFD_6_HI		0xd1
+#define A4XX_RBBM_PERFCTR_VFD_7_LO		0xd2
+#define A4XX_RBBM_PERFCTR_VFD_7_HI		0xd3
+#define A4XX_RBBM_PERFCTR_HLSQ_0_LO		0xd4
+#define A4XX_RBBM_PERFCTR_HLSQ_0_HI		0xd5
+#define A4XX_RBBM_PERFCTR_HLSQ_1_LO		0xd6
+#define A4XX_RBBM_PERFCTR_HLSQ_1_HI		0xd7
+#define A4XX_RBBM_PERFCTR_HLSQ_2_LO		0xd8
+#define A4XX_RBBM_PERFCTR_HLSQ_2_HI		0xd9
+#define A4XX_RBBM_PERFCTR_HLSQ_3_LO		0xda
+#define A4XX_RBBM_PERFCTR_HLSQ_3_HI		0xdb
+#define A4XX_RBBM_PERFCTR_HLSQ_4_LO		0xdc
+#define A4XX_RBBM_PERFCTR_HLSQ_4_HI		0xdd
+#define A4XX_RBBM_PERFCTR_HLSQ_5_LO		0xde
+#define A4XX_RBBM_PERFCTR_HLSQ_5_HI		0xdf
+#define A4XX_RBBM_PERFCTR_HLSQ_6_LO		0xe0
+#define A4XX_RBBM_PERFCTR_HLSQ_6_HI		0xe1
+#define A4XX_RBBM_PERFCTR_HLSQ_7_LO		0xe2
+#define A4XX_RBBM_PERFCTR_HLSQ_7_HI		0xe3
+#define A4XX_RBBM_PERFCTR_VPC_0_LO		0xe4
+#define A4XX_RBBM_PERFCTR_VPC_0_HI		0xe5
+#define A4XX_RBBM_PERFCTR_VPC_1_LO		0xe6
+#define A4XX_RBBM_PERFCTR_VPC_1_HI		0xe7
+#define A4XX_RBBM_PERFCTR_VPC_2_LO		0xe8
+#define A4XX_RBBM_PERFCTR_VPC_2_HI		0xe9
+#define A4XX_RBBM_PERFCTR_VPC_3_LO		0xea
+#define A4XX_RBBM_PERFCTR_VPC_3_HI		0xeb
+#define A4XX_RBBM_PERFCTR_CCU_0_LO		0xec
+#define A4XX_RBBM_PERFCTR_CCU_0_HI		0xed
+#define A4XX_RBBM_PERFCTR_CCU_1_LO		0xee
+#define A4XX_RBBM_PERFCTR_CCU_1_HI		0xef
+#define A4XX_RBBM_PERFCTR_CCU_2_LO		0xf0
+#define A4XX_RBBM_PERFCTR_CCU_2_HI		0xf1
+#define A4XX_RBBM_PERFCTR_CCU_3_LO		0xf2
+#define A4XX_RBBM_PERFCTR_CCU_3_HI		0xf3
+#define A4XX_RBBM_PERFCTR_TSE_0_LO		0xf4
+#define A4XX_RBBM_PERFCTR_TSE_0_HI		0xf5
+#define A4XX_RBBM_PERFCTR_TSE_1_LO		0xf6
+#define A4XX_RBBM_PERFCTR_TSE_1_HI		0xf7
+#define A4XX_RBBM_PERFCTR_TSE_2_LO		0xf8
+#define A4XX_RBBM_PERFCTR_TSE_2_HI		0xf9
+#define A4XX_RBBM_PERFCTR_TSE_3_LO		0xfa
+#define A4XX_RBBM_PERFCTR_TSE_3_HI		0xfb
+#define A4XX_RBBM_PERFCTR_RAS_0_LO		0xfc
+#define A4XX_RBBM_PERFCTR_RAS_0_HI		0xfd
+#define A4XX_RBBM_PERFCTR_RAS_1_LO		0xfe
+#define A4XX_RBBM_PERFCTR_RAS_1_HI		0xff
+#define A4XX_RBBM_PERFCTR_RAS_2_LO		0x100
+#define A4XX_RBBM_PERFCTR_RAS_2_HI		0x101
+#define A4XX_RBBM_PERFCTR_RAS_3_LO		0x102
+#define A4XX_RBBM_PERFCTR_RAS_3_HI		0x103
+#define A4XX_RBBM_PERFCTR_UCHE_0_LO		0x104
+#define A4XX_RBBM_PERFCTR_UCHE_0_HI		0x105
+#define A4XX_RBBM_PERFCTR_UCHE_1_LO		0x106
+#define A4XX_RBBM_PERFCTR_UCHE_1_HI		0x107
+#define A4XX_RBBM_PERFCTR_UCHE_2_LO		0x108
+#define A4XX_RBBM_PERFCTR_UCHE_2_HI		0x109
+#define A4XX_RBBM_PERFCTR_UCHE_3_LO		0x10a
+#define A4XX_RBBM_PERFCTR_UCHE_3_HI		0x10b
+#define A4XX_RBBM_PERFCTR_UCHE_4_LO		0x10c
+#define A4XX_RBBM_PERFCTR_UCHE_4_HI		0x10d
+#define A4XX_RBBM_PERFCTR_UCHE_5_LO		0x10e
+#define A4XX_RBBM_PERFCTR_UCHE_5_HI		0x10f
+#define A4XX_RBBM_PERFCTR_UCHE_6_LO		0x110
+#define A4XX_RBBM_PERFCTR_UCHE_6_HI		0x111
+#define A4XX_RBBM_PERFCTR_UCHE_7_LO		0x112
+#define A4XX_RBBM_PERFCTR_UCHE_7_HI		0x113
+#define A4XX_RBBM_PERFCTR_TP_0_LO		0x114
+#define A4XX_RBBM_PERFCTR_TP_0_HI		0x115
+#define A4XX_RBBM_PERFCTR_TP_1_LO		0x116
+#define A4XX_RBBM_PERFCTR_TP_1_HI		0x117
+#define A4XX_RBBM_PERFCTR_TP_2_LO		0x118
+#define A4XX_RBBM_PERFCTR_TP_2_HI		0x119
+#define A4XX_RBBM_PERFCTR_TP_3_LO		0x11a
+#define A4XX_RBBM_PERFCTR_TP_3_HI		0x11b
+#define A4XX_RBBM_PERFCTR_TP_4_LO		0x11c
+#define A4XX_RBBM_PERFCTR_TP_4_HI		0x11d
+#define A4XX_RBBM_PERFCTR_TP_5_LO		0x11e
+#define A4XX_RBBM_PERFCTR_TP_5_HI		0x11f
+#define A4XX_RBBM_PERFCTR_TP_6_LO		0x120
+#define A4XX_RBBM_PERFCTR_TP_6_HI		0x121
+#define A4XX_RBBM_PERFCTR_TP_7_LO		0x122
+#define A4XX_RBBM_PERFCTR_TP_7_HI		0x123
+#define A4XX_RBBM_PERFCTR_SP_0_LO		0x124
+#define A4XX_RBBM_PERFCTR_SP_0_HI		0x125
+#define A4XX_RBBM_PERFCTR_SP_1_LO		0x126
+#define A4XX_RBBM_PERFCTR_SP_1_HI		0x127
+#define A4XX_RBBM_PERFCTR_SP_2_LO		0x128
+#define A4XX_RBBM_PERFCTR_SP_2_HI		0x129
+#define A4XX_RBBM_PERFCTR_SP_3_LO		0x12a
+#define A4XX_RBBM_PERFCTR_SP_3_HI		0x12b
+#define A4XX_RBBM_PERFCTR_SP_4_LO		0x12c
+#define A4XX_RBBM_PERFCTR_SP_4_HI		0x12d
+#define A4XX_RBBM_PERFCTR_SP_5_LO		0x12e
+#define A4XX_RBBM_PERFCTR_SP_5_HI		0x12f
+#define A4XX_RBBM_PERFCTR_SP_6_LO		0x130
+#define A4XX_RBBM_PERFCTR_SP_6_HI		0x131
+#define A4XX_RBBM_PERFCTR_SP_7_LO		0x132
+#define A4XX_RBBM_PERFCTR_SP_7_HI		0x133
+#define A4XX_RBBM_PERFCTR_SP_8_LO		0x134
+#define A4XX_RBBM_PERFCTR_SP_8_HI		0x135
+#define A4XX_RBBM_PERFCTR_SP_9_LO		0x136
+#define A4XX_RBBM_PERFCTR_SP_9_HI		0x137
+#define A4XX_RBBM_PERFCTR_SP_10_LO		0x138
+#define A4XX_RBBM_PERFCTR_SP_10_HI		0x139
+#define A4XX_RBBM_PERFCTR_SP_11_LO		0x13a
+#define A4XX_RBBM_PERFCTR_SP_11_HI		0x13b
+#define A4XX_RBBM_PERFCTR_RB_0_LO		0x13c
+#define A4XX_RBBM_PERFCTR_RB_0_HI		0x13d
+#define A4XX_RBBM_PERFCTR_RB_1_LO		0x13e
+#define A4XX_RBBM_PERFCTR_RB_1_HI		0x13f
+#define A4XX_RBBM_PERFCTR_RB_2_LO		0x140
+#define A4XX_RBBM_PERFCTR_RB_2_HI		0x141
+#define A4XX_RBBM_PERFCTR_RB_3_LO		0x142
+#define A4XX_RBBM_PERFCTR_RB_3_HI		0x143
+#define A4XX_RBBM_PERFCTR_RB_4_LO		0x144
+#define A4XX_RBBM_PERFCTR_RB_4_HI		0x145
+#define A4XX_RBBM_PERFCTR_RB_5_LO		0x146
+#define A4XX_RBBM_PERFCTR_RB_5_HI		0x147
+#define A4XX_RBBM_PERFCTR_RB_6_LO		0x148
+#define A4XX_RBBM_PERFCTR_RB_6_HI		0x149
+#define A4XX_RBBM_PERFCTR_RB_7_LO		0x14a
+#define A4XX_RBBM_PERFCTR_RB_7_HI		0x14b
+#define A4XX_RBBM_PERFCTR_VSC_0_LO		0x14c
+#define A4XX_RBBM_PERFCTR_VSC_0_HI		0x14d
+#define A4XX_RBBM_PERFCTR_VSC_1_LO		0x14e
+#define A4XX_RBBM_PERFCTR_VSC_1_HI		0x14f
+#define A4XX_RBBM_PERFCTR_PWR_0_LO		0x166
+#define A4XX_RBBM_PERFCTR_PWR_0_HI		0x167
+#define A4XX_RBBM_PERFCTR_PWR_1_LO		0x168
+#define A4XX_RBBM_PERFCTR_PWR_1_HI		0x169
+#define A4XX_RBBM_PERFCTR_CTL			0x170
+#define A4XX_RBBM_PERFCTR_LOAD_CMD0		0x171
+#define A4XX_RBBM_PERFCTR_LOAD_CMD1		0x172
+#define A4XX_RBBM_PERFCTR_LOAD_CMD2		0x173
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_0		0x176
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_1		0x177
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_2		0x178
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_3		0x179
+#define A4XX_RBBM_GPU_BUSY_MASKED		0x17a
+#define A4XX_RBBM_INT_0_STATUS			0x17d
+#define A4XX_RBBM_AHB_ERROR_STATUS		0x18f
+#define A4XX_RBBM_STATUS			0x191
+#define A4XX_RBBM_CFG_COUNTER0			0x1a2
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF0		0x1a9
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF1		0x1aa
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF2		0x1ab
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF3		0x1ac
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4		0x1ad
+#define A4XX_RBBM_CFG_DEBBUS_MISR0		0x1ae
+#define A4XX_RBBM_CFG_DEBBUS_MISR1		0x1af
 
 /* CP registers */
 #define A4XX_CP_SCRATCH_REG0		0x578
diff --git a/drivers/gpu/msm2/adreno.c b/drivers/gpu/msm2/adreno.c
index 9c9b761..45b6c0a 100644
--- a/drivers/gpu/msm2/adreno.c
+++ b/drivers/gpu/msm2/adreno.c
@@ -17,7 +17,6 @@
 #include <linux/sched.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/msm_kgsl.h>
 #include <linux/delay.h>
 #include <linux/of_coresight.h>
 
@@ -162,6 +161,12 @@
 	unsigned int pfp_jt_idx;
 	/* PFP jump table load addr */
 	unsigned int pfp_jt_addr;
+	/* PM4 bootstrap loader size */
+	unsigned int pm4_bstrp_size;
+	/* PFP bootstrap loader size */
+	unsigned int pfp_bstrp_size;
+	/* PFP bootstrap loader supported version */
+	unsigned int pfp_bstrp_ver;
 
 } adreno_gpulist[] = {
 	{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
@@ -199,7 +204,8 @@
 		512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
 	{ ADRENO_REV_A330, 3, 3, 0, ANY_ID,
 		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
-		512, 0, 2, SZ_1M, NO_VER, NO_VER, 0x8AD, 0x2E4, 0x201, 0x200 },
+		512, 0, 2, SZ_1M, NO_VER, NO_VER, 0x8AD, 0x2E4, 0x201, 0x200,
+		0x6, 0x20, 0x330020 },
 	{ ADRENO_REV_A305B, 3, 0, 5, 0x10,
 		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
 		512, 0, 2, SZ_128K, NO_VER, NO_VER, 0x8AD, 0x2E4,
@@ -419,12 +425,29 @@
 	return 0;
 }
 
+static inline void refcount_group(struct adreno_perfcount_group *group,
+	unsigned int reg, unsigned int flags,
+	unsigned int *lo, unsigned int *hi)
+{
+	if (flags & PERFCOUNTER_FLAG_KERNEL)
+		group->regs[reg].kernelcount++;
+	else
+		group->regs[reg].usercount++;
+
+	if (lo)
+		*lo = group->regs[reg].offset;
+
+	if (hi)
+		*hi = group->regs[reg].offset_hi;
+}
+
 /**
  * adreno_perfcounter_get: Try to put a countable in an available counter
  * @adreno_dev: Adreno device to configure
  * @groupid: Desired performance counter group
  * @countable: Countable desired to be in a counter
- * @offset: Return offset of the countable
+ * @offset: Return offset of the LO counter assigned
+ * @offset_hi: Return offset of the HI counter assigned
  * @flags: Used to setup kernel perf counters
  *
  * Try to place a countable in an available counter.  If the countable is
@@ -434,7 +457,7 @@
 
 int adreno_perfcounter_get(struct adreno_device *adreno_dev,
 	unsigned int groupid, unsigned int countable, unsigned int *offset,
-	unsigned int flags)
+	unsigned int *offset_hi, unsigned int flags)
 {
 	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
 	struct adreno_perfcount_group *group;
@@ -444,6 +467,8 @@
 	/* always clear return variables */
 	if (offset)
 		*offset = 0;
+	if (offset_hi)
+		*offset_hi = 0;
 
 	if (NULL == counters)
 		return -EINVAL;
@@ -455,22 +480,16 @@
 
 	/*
 	 * Check if the countable is already associated with a counter.
-	 * Refcount and return the offset, otherwise, try and find an empty
-	 * counter and assign the countable to it.
+	 * Refcount and return the offset, otherwise, try and find an
+	 * empty counter and assign the countable to it.
 	 */
+
 	for (i = 0; i < group->reg_count; i++) {
 		if (group->regs[i].countable == countable) {
-			/* Countable already associated with counter */
-			if (flags & PERFCOUNTER_FLAG_KERNEL)
-				group->regs[i].kernelcount++;
-			else
-				group->regs[i].usercount++;
-
-			if (offset)
-				*offset = group->regs[i].offset;
+			refcount_group(group, i, flags, offset, offset_hi);
 			return 0;
 		} else if (group->regs[i].countable ==
-			KGSL_PERFCOUNTER_NOT_USED) {
+					KGSL_PERFCOUNTER_NOT_USED) {
 			/* keep track of unused counter */
 			empty = i;
 		}
@@ -499,6 +518,8 @@
 
 	if (offset)
 		*offset = group->regs[empty].offset;
+	if (offset_hi)
+		*offset_hi = group->regs[empty].offset_hi;
 
 	return ret;
 }
@@ -574,8 +595,6 @@
 
 	kgsl_mmu_unmap(pagetable, &rb->buffer_desc);
 
-	kgsl_mmu_unmap(pagetable, &rb->memptrs_desc);
-
 	kgsl_mmu_unmap(pagetable, &device->memstore);
 
 	kgsl_mmu_unmap(pagetable, &adreno_dev->pwron_fixup);
@@ -592,8 +611,11 @@
 
 	result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc);
 
-	if (!result)
-		result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc);
+	/*
+	 * ALERT: Order of these mapping is important to
+	 * Keep the most used entries like memstore
+	 * and mmu setstate memory by TLB prefetcher.
+	 */
 
 	if (!result)
 		result = kgsl_mmu_map_global(pagetable, &device->memstore);
@@ -862,9 +884,7 @@
 					uint32_t flags)
 {
 	phys_addr_t pt_val;
-	unsigned int link[230];
-	unsigned int *cmds = &link[0];
-	int sizedwords = 0;
+	unsigned int *link, *cmds;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int num_iommu_units;
 	struct kgsl_context *context;
@@ -879,17 +899,18 @@
 	num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
 
 	context = kgsl_context_get(device, context_id);
-	if (context == NULL) {
-		kgsl_mmu_device_setstate(&device->mmu, KGSL_CONTEXT_INVALID);
-		return -EINVAL;
+	if (context)
+		adreno_ctx = ADRENO_CONTEXT(context);
+
+	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (link == NULL) {
+		result = -ENOMEM;
+		goto done;
 	}
 
-	adreno_ctx = ADRENO_CONTEXT(context);
+	cmds = link;
 
-	result = kgsl_mmu_enable_clk(&device->mmu,
-				KGSL_IOMMU_CONTEXT_USER);
-	if (result)
-		goto done;
+	kgsl_mmu_enable_clk(&device->mmu, KGSL_IOMMU_MAX_UNITS);
 
 	pt_val = kgsl_mmu_get_pt_base_addr(&device->mmu,
 				device->mmu.hwpagetable);
@@ -906,17 +927,11 @@
 		cmds += _adreno_iommu_setstate_v1(device, cmds, pt_val,
 						num_iommu_units, flags);
 
-	sizedwords += (cmds - &link[0]);
-	if (sizedwords == 0) {
-		KGSL_DRV_ERR(device, "no commands generated\n");
-		BUG();
-	}
 	/* invalidate all base pointers */
 	*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
 	*cmds++ = 0x7fff;
-	sizedwords += 2;
 
-	if (sizedwords > (sizeof(link)/sizeof(unsigned int))) {
+	if ((unsigned int) (cmds - link) > (PAGE_SIZE / sizeof(unsigned int))) {
 		KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
 		BUG();
 	}
@@ -924,13 +939,22 @@
 	 * This returns the per context timestamp but we need to
 	 * use the global timestamp for iommu clock disablement
 	 */
-	adreno_ringbuffer_issuecmds(device, adreno_ctx, KGSL_CMD_FLAGS_PMODE,
-			&link[0], sizedwords);
+	result = adreno_ringbuffer_issuecmds(device, adreno_ctx,
+			KGSL_CMD_FLAGS_PMODE, link,
+			(unsigned int)(cmds - link));
 
-	kgsl_mmu_disable_clk_on_ts(&device->mmu,
-		rb->global_ts, true);
+	/*
+	 * On error disable the IOMMU clock right away otherwise turn it off
+	 * after the command has been retired
+	 */
+	if (result)
+		kgsl_mmu_disable_clk(&device->mmu, KGSL_IOMMU_MAX_UNITS);
+	else
+		kgsl_mmu_disable_clk_on_ts(&device->mmu, rb->global_ts,
+						KGSL_IOMMU_MAX_UNITS);
 
 done:
+	kfree(link);
 	kgsl_context_put(context);
 	return result;
 }
@@ -964,6 +988,7 @@
 		context = kgsl_context_get(device, context_id);
 		if (context == NULL)
 			return -EINVAL;
+
 		adreno_ctx = ADRENO_CONTEXT(context);
 
 		if (flags & KGSL_MMUFLAGS_PTUPDATE) {
@@ -1179,8 +1204,11 @@
 	adreno_dev->gmem_size = adreno_gpulist[i].gmem_size;
 	adreno_dev->pm4_jt_idx = adreno_gpulist[i].pm4_jt_idx;
 	adreno_dev->pm4_jt_addr = adreno_gpulist[i].pm4_jt_addr;
+	adreno_dev->pm4_bstrp_size = adreno_gpulist[i].pm4_bstrp_size;
 	adreno_dev->pfp_jt_idx = adreno_gpulist[i].pfp_jt_idx;
 	adreno_dev->pfp_jt_addr = adreno_gpulist[i].pfp_jt_addr;
+	adreno_dev->pfp_bstrp_size = adreno_gpulist[i].pfp_bstrp_size;
+	adreno_dev->pfp_bstrp_ver = adreno_gpulist[i].pfp_bstrp_ver;
 	adreno_dev->gpulist_index = i;
 	/*
 	 * Initialize uninitialzed gpu registers, only needs to be done once
@@ -1589,7 +1617,6 @@
 static int adreno_init(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 	int i;
 	int ret;
 
@@ -1637,8 +1664,6 @@
 		adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver))
 		device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC;
 
-	rb->global_ts = 0;
-
 	/* Initialize ft detection register offsets */
 	ft_detect_regs[0] = adreno_getreg(adreno_dev,
 						ADRENO_REG_RBBM_STATUS);
@@ -1667,7 +1692,6 @@
 	if (adreno_is_a330v2(adreno_dev))
 		adreno_a3xx_pwron_fixup_init(adreno_dev);
 
-	set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
 done:
 	return ret;
 }
@@ -1679,6 +1703,9 @@
 	unsigned int state = device->state;
 	unsigned int regulator_left_on = 0;
 
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
 	kgsl_cffdump_open(device);
 
 	kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
@@ -1688,7 +1715,7 @@
 				regulator_is_enabled(device->pwrctrl.gpu_cx)));
 
 	/* Clear any GPU faults that might have been left over */
-	adreno_set_gpu_fault(adreno_dev, 0);
+	adreno_clear_gpu_fault(adreno_dev);
 
 	/* Power up the device */
 	kgsl_pwrctrl_enable(device);
@@ -1734,7 +1761,7 @@
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
 	device->ftbl->irqctrl(device, 1);
 
-	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
+	status = adreno_ringbuffer_cold_start(&adreno_dev->ringbuffer);
 	if (status)
 		goto error_irq_off;
 
@@ -1747,6 +1774,8 @@
 
 	device->reset_counter++;
 
+	set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
 	return 0;
 
 error_rb_stop:
@@ -1790,6 +1819,8 @@
 
 	kgsl_cffdump_close(device);
 
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
 	return 0;
 }
 
@@ -1797,7 +1828,9 @@
  * adreno_reset() - Helper function to reset the GPU
  * @device: Pointer to the KGSL device structure for the GPU
  *
- * Helper function to reset the GPU hardware by toggling the footswitch
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
  */
 int adreno_reset(struct kgsl_device *device)
 {
@@ -2201,12 +2234,70 @@
 	return status;
 }
 
-static int adreno_setproperty(struct kgsl_device *device,
+static int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint)
+{
+	int status = 0;
+
+	switch (constraint->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		struct kgsl_device_constraint_pwrlevel pwr;
+
+		if (constraint->size != sizeof(pwr)) {
+			status = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&pwr,
+				(void __user *)constraint->data,
+				sizeof(pwr))) {
+			status = -EFAULT;
+			break;
+		}
+		if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) {
+			status = -EINVAL;
+			break;
+		}
+
+		context->pwr_constraint.type =
+				KGSL_CONSTRAINT_PWRLEVEL;
+		context->pwr_constraint.sub_type = pwr.level;
+		trace_kgsl_user_pwrlevel_constraint(device,
+			context->id,
+			context->pwr_constraint.type,
+			context->pwr_constraint.sub_type);
+		}
+		break;
+	case KGSL_CONSTRAINT_NONE:
+		if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL)
+			trace_kgsl_user_pwrlevel_constraint(device,
+				context->id,
+				KGSL_CONSTRAINT_NONE,
+				context->pwr_constraint.sub_type);
+		context->pwr_constraint.type = KGSL_CONSTRAINT_NONE;
+		break;
+
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	/* If a new constraint has been set for a context, cancel the old one */
+	if ((status == 0) &&
+		(context->id == device->pwrctrl.constraint.owner_id))
+		device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+
+	return status;
+}
+
+static int adreno_setproperty(struct kgsl_device_private *dev_priv,
 				enum kgsl_property_type type,
 				void *value,
 				unsigned int sizebytes)
 {
 	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	switch (type) {
@@ -2223,6 +2314,7 @@
 			}
 
 			if (enable) {
+				device->pwrctrl.ctrl_flags = 0;
 				adreno_dev->fast_hang_detect = 1;
 				kgsl_pwrscale_enable(device);
 			} else {
@@ -2235,6 +2327,28 @@
 			status = 0;
 		}
 		break;
+	case KGSL_PROP_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint))
+				break;
+
+			if (copy_from_user(&constraint, value,
+				sizeof(constraint))) {
+				status = -EFAULT;
+				break;
+			}
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+			if (context == NULL)
+				break;
+			status = adreno_set_constraint(device, context,
+								&constraint);
+			kgsl_context_put(context);
+		}
+		break;
 	default:
 		break;
 	}
@@ -2258,7 +2372,6 @@
 	if (adreno_dev->gpudev->irq_pending(adreno_dev))
 		return false;
 
-	/* Read the correct RBBM status for the GPU type */
 	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS,
 		&reg_rbbm_status);
 
@@ -2291,6 +2404,9 @@
 		return -EINVAL;
 	}
 
+	if (adreno_dev->drawctxt_active)
+		kgsl_context_put(&adreno_dev->drawctxt_active->base);
+
 	adreno_dev->drawctxt_active = NULL;
 
 	/* Stop the ringbuffer */
@@ -2301,7 +2417,7 @@
 
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 
-	adreno_set_gpu_fault(adreno_dev, 0);
+	adreno_clear_gpu_fault(adreno_dev);
 
 	/* Delete the idle timer */
 	del_timer_sync(&device->idle_timer);
@@ -2327,7 +2443,7 @@
 	if (adreno_dev->pm4_jt_idx)
 		ret = adreno_ringbuffer_warm_start(&adreno_dev->ringbuffer);
 	else
-		ret = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
+		ret = adreno_ringbuffer_cold_start(&adreno_dev->ringbuffer);
 
 	if (ret)
 		return ret;
@@ -2337,7 +2453,7 @@
 	return 0;
 }
 
-/**
+/*
  * adreno_isidle() - return true if the GPU hardware is idle
  * @device: Pointer to the KGSL device structure for the GPU
  *
@@ -2349,8 +2465,7 @@
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	unsigned int rptr;
 
-	/* If the device isn't active, don't force it on. */
-	if (device->state != KGSL_STATE_ACTIVE)
+	if (!kgsl_pwrctrl_isenabled(device))
 		return true;
 
 	rptr = adreno_get_rptr(&adreno_dev->ringbuffer);
@@ -2383,7 +2498,7 @@
 	if (adreno_is_a3xx(adreno_dev) || adreno_is_a4xx(adreno_dev))
 		kgsl_cffdump_regpoll(device,
 			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
-		        0x00000000, 0x80000000);
+			0x00000000, 0x80000000);
 	else
 		kgsl_cffdump_regpoll(device,
 			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
@@ -2499,9 +2614,6 @@
 	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->buffer_desc, gpuaddr, size))
 		return &ringbuffer->buffer_desc;
 
-	if (kgsl_gpuaddr_in_memdesc(&ringbuffer->memptrs_desc, gpuaddr, size))
-		return &ringbuffer->memptrs_desc;
-
 	if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size))
 		return &device->memstore;
 
@@ -2647,7 +2759,7 @@
 		return -EINVAL;
 
 	ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context,
-		timestamp, msecs_to_jiffies(msecs));
+		timestamp, msecs);
 
 	/* If the context got invalidated then return a specific error */
 	drawctxt = ADRENO_CONTEXT(context);
@@ -2655,6 +2767,15 @@
 	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
 		ret = -EDEADLK;
 
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked.  Userspace uses this as a marker for performing post
+	 * fault activities
+	 */
+
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv))
+		ret = -EPROTO;
+
 	return ret;
 }
 
@@ -2728,7 +2849,8 @@
 		if (result)
 			break;
 		result = adreno_perfcounter_get(adreno_dev, get->groupid,
-			get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
+			get->countable, &get->offset, &get->offset_hi,
+			PERFCOUNTER_FLAG_NONE);
 		kgsl_active_count_put(device);
 		break;
 	}
diff --git a/drivers/gpu/msm2/adreno.h b/drivers/gpu/msm2/adreno.h
index 54503f2..23309a1 100644
--- a/drivers/gpu/msm2/adreno.h
+++ b/drivers/gpu/msm2/adreno.h
@@ -19,6 +19,8 @@
 #include "kgsl_iommu.h"
 #include <mach/ocmem.h>
 
+#include "a3xx_reg.h"
+
 #define DEVICE_3D_NAME "kgsl-3d"
 #define DEVICE_3D0_NAME "kgsl-3d0"
 
@@ -58,7 +60,6 @@
 #define ADRENO_DEFAULT_PWRSCALE_POLICY  NULL
 #endif
 
-void adreno_debugfs_init(struct kgsl_device *device);
 
 #define ADRENO_ISTORE_START 0x5000 /* Istore offset */
 
@@ -168,8 +169,11 @@
 	unsigned int wait_timeout;
 	unsigned int pm4_jt_idx;
 	unsigned int pm4_jt_addr;
+	unsigned int pm4_bstrp_size;
 	unsigned int pfp_jt_idx;
 	unsigned int pfp_jt_addr;
+	unsigned int pfp_bstrp_size;
+	unsigned int pfp_bstrp_ver;
 	unsigned int istore_size;
 	unsigned int pix_shader_start;
 	unsigned int instruction_size;
@@ -199,6 +203,7 @@
 	ADRENO_DEVICE_PWRON = 0,
 	ADRENO_DEVICE_PWRON_FIXUP = 1,
 	ADRENO_DEVICE_INITIALIZED = 2,
+	ADRENO_DEVICE_STARTED = 3,
 };
 
 #define PERFCOUNTER_FLAG_NONE 0x0
@@ -220,6 +225,7 @@
 	unsigned int kernelcount;
 	unsigned int usercount;
 	unsigned int offset;
+	unsigned int offset_hi;
 	int load_bit;
 	unsigned int select;
 };
@@ -307,6 +313,7 @@
 	ADRENO_REG_TC_CNTL_STATUS,
 	ADRENO_REG_TP0_CHICKEN,
 	ADRENO_REG_RBBM_RBBM_CTL,
+	ADRENO_REG_UCHE_INVALIDATE0,
 	ADRENO_REG_REGISTER_MAX,
 };
 
@@ -376,8 +383,8 @@
 	void (*coresight_disable) (struct kgsl_device *device);
 	void (*coresight_config_debug_reg) (struct kgsl_device *device,
 			int debug_reg, unsigned int val);
-	void (*postmortem_dump)(struct adreno_device *adreno_dev);
 	void (*soft_reset)(struct adreno_device *device);
+	void (*postmortem_dump)(struct adreno_device *adreno_dev);
 };
 
 #define FT_DETECT_REGS_COUNT 12
@@ -394,7 +401,9 @@
 #define  KGSL_FT_SKIPFRAME                3
 #define  KGSL_FT_DISABLE                  4
 #define  KGSL_FT_TEMP_DISABLE             5
-#define  KGSL_FT_DEFAULT_POLICY (BIT(KGSL_FT_REPLAY) + BIT(KGSL_FT_SKIPIB))
+#define  KGSL_FT_THROTTLE                 6
+#define  KGSL_FT_DEFAULT_POLICY (BIT(KGSL_FT_REPLAY) + BIT(KGSL_FT_SKIPIB) \
+				+ BIT(KGSL_FT_THROTTLE))
 
 /* This internal bit is used to skip the PM dump on replayed command batches */
 #define  KGSL_FT_SKIP_PMDUMP              31
@@ -412,15 +421,8 @@
 	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
 	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
 	{ BIT(KGSL_FT_DISABLE), "disable" }, \
-	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
-
-#define ADRENO_FT_TYPES \
-	{ BIT(KGSL_FT_OFF), "off" }, \
-	{ BIT(KGSL_FT_REPLAY), "replay" }, \
-	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
-	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
-	{ BIT(KGSL_FT_DISABLE), "disable" }, \
-	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \
+	{ BIT(KGSL_FT_THROTTLE), "throttle"}
 
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
@@ -508,7 +510,7 @@
 
 int adreno_perfcounter_get(struct adreno_device *adreno_dev,
 	unsigned int groupid, unsigned int countable, unsigned int *offset,
-	unsigned int flags);
+	unsigned int *offset_hi, unsigned int flags);
 
 int adreno_perfcounter_put(struct adreno_device *adreno_dev,
 	unsigned int groupid, unsigned int countable, unsigned int flags);
@@ -713,6 +715,11 @@
 	*cmds++ = val;
 	*cmds++ = 0xFFFFFFFF;
 	*cmds++ = 0xFFFFFFFF;
+
+	/* WAIT_REG_MEM turns back on protected mode - push it off */
+	*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
 	cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
 	return cmds - start;
 }
@@ -728,13 +735,13 @@
 	unsigned int *start = cmds;
 
 	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
-	*cmds++ = 0x00000000;
+	*cmds++ = 0;
 
 	if ((adreno_dev->gpurev == ADRENO_REV_A305) ||
 		(adreno_dev->gpurev == ADRENO_REV_A305C) ||
 		(adreno_dev->gpurev == ADRENO_REV_A320)) {
 		*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
-		*cmds++ = 0x00000000;
+		*cmds++ = 0;
 	}
 
 	return cmds - start;
@@ -851,6 +858,19 @@
 	smp_wmb();
 }
 
+/**
+ * adreno_clear_gpu_fault() - Clear the GPU fault register
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the GPU fault status for the adreno device
+ */
+
+static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, 0);
+	smp_wmb();
+}
+
 /*
  * adreno_vbif_start() - Program VBIF registers, called in device start
  * @device: Pointer to device whose vbif data is to be programmed
@@ -879,4 +899,75 @@
 	}
 }
 
+#ifdef CONFIG_DEBUG_FS
+void adreno_debugfs_init(struct kgsl_device *device);
+#else
+static inline void adreno_debugfs_init(struct kgsl_device *device) { }
+#endif
+
+/*
+ * adreno_bootstrap_ucode() - Checks if Ucode bootstrapping is supported
+ * @adreno_dev:		Pointer to the the adreno device
+ */
+static inline int adreno_bootstrap_ucode(struct adreno_device *adreno_dev)
+{
+	if ((adreno_dev->pfp_bstrp_size) && (adreno_dev->pm4_bstrp_size)
+		&& (adreno_dev->pfp_fw_version >= adreno_dev->pfp_bstrp_ver))
+		return 1;
+	else
+		return 0;
+}
+
+/**
+ * adreno_get_rptr() - Get the current ringbuffer read pointer
+ * @rb: Pointer the ringbuffer to query
+ *
+ * Get the current read pointer from the GPU register.
+ */
+static inline unsigned int
+adreno_get_rptr(struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
+	unsigned int result;
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &result);
+	return result;
+}
+/**
+ * adreno_set_protected_registers() - Protect the specified range of registers
+ * from being accessed by the GPU
+ * @device: pointer to the KGSL device
+ * @index: Pointer to the index of the protect mode register to write to
+ * @reg: Starting dword register to write
+ * @mask_len: Size of the mask to protect (# of registers = 2 ** mask_len)
+ *
+ * Add the range of registers to the list of protected mode registers that will
+ * cause an exception if the GPU accesses them.  There are 16 available
+ * protected mode registers.  Index is used to specify which register to write
+ * to - the intent is to call this function multiple times with the same index
+ * pointer for each range and the registers will be magically programmed in
+ * incremental fashion
+ */
+static inline void adreno_set_protected_registers(struct kgsl_device *device,
+	unsigned int *index, unsigned int reg, int mask_len)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int val;
+
+	/* This function is only for adreno A3XX and beyond */
+	BUG_ON(adreno_is_a2xx(adreno_dev));
+
+	/* There are only 16 registers available */
+	BUG_ON(*index >= 16);
+
+	val = 0x60000000 | ((mask_len & 0x1F) << 24) | ((reg << 2) & 0x1FFFF);
+
+	/*
+	 * Write the protection range to the next available protection
+	 * register
+	 */
+
+	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_0 + *index, val);
+	*index = *index + 1;
+}
+
 #endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm2/adreno_a2xx.c b/drivers/gpu/msm2/adreno_a2xx.c
index a5987af..ef641c1 100644
--- a/drivers/gpu/msm2/adreno_a2xx.c
+++ b/drivers/gpu/msm2/adreno_a2xx.c
@@ -655,7 +655,7 @@
 	unsigned int addr = shadow->gmemshadow.gpuaddr;
 	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Store TP0_CHICKEN register */
 		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 		*cmds++ = REG_TP0_CHICKEN;
@@ -864,7 +864,7 @@
 	unsigned int *cmds = shadow->gmem_restore_commands;
 	unsigned int *start = cmds;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Store TP0_CHICKEN register */
 		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 		*cmds++ = REG_TP0_CHICKEN;
@@ -1334,8 +1334,6 @@
 static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
-	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
-
 	/* build indirect command buffers to save & restore regs/constants */
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_regsave_cmds(adreno_dev, drawctxt);
@@ -1354,16 +1352,14 @@
 	calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
 	tmp_ctx.gmem_base = adreno_dev->gmem_base;
 
-	result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
+	result = kgsl_allocate(&(adreno_dev->dev),
+		&drawctxt->context_gmem_shadow.gmemshadow,
 		drawctxt->base.proc_priv->pagetable,
 		drawctxt->context_gmem_shadow.size);
 
 	if (result)
 		return result;
 
-	/* set the gmem shadow flag for the context */
-	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
-
 	/* blank out gmem shadow. */
 	kgsl_sharedmem_set(drawctxt->base.device,
 			&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
@@ -1374,7 +1370,7 @@
 		&tmp_ctx.cmd);
 
 	/* build TP0_CHICKEN register restore command buffer */
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE))
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE))
 		tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
 
 	/* build indirect command buffers to save & restore gmem */
@@ -1437,8 +1433,8 @@
 {
 	int ret;
 
-	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE
-	   && drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC) {
+	if (drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE
+	   && drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC) {
 		drawctxt->ops = (adreno_is_a225(adreno_dev))
 			?  &a225_preamble_ctx_ops : &adreno_preamble_ctx_ops;
 
@@ -1455,7 +1451,7 @@
 	 * and texture and vertex buffer storage too
 	 */
 
-	ret = kgsl_allocate(&drawctxt->gpustate,
+	ret = kgsl_allocate(&(adreno_dev->dev), &drawctxt->gpustate,
 		drawctxt->base.proc_priv->pagetable, _context_size(adreno_dev));
 
 	if (ret)
@@ -1467,15 +1463,14 @@
 	tmp_ctx.cmd = tmp_ctx.start
 	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
 
-		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
 	}
 
-	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
 		ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
@@ -1555,7 +1550,7 @@
 	struct kgsl_device *device = &adreno_dev->dev;
 	int ret;
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->reg_save[1],
 			context->reg_save[2] << 2, true);
@@ -1567,7 +1562,7 @@
 		if (ret)
 			return ret;
 
-		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+		if (test_bit(ADRENO_CONTEXT_SHADER_SAVE, &context->priv)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_save[1],
@@ -1577,6 +1572,8 @@
 				KGSL_CMD_FLAGS_PMODE,
 				context->shader_save, 3);
 
+			if (ret)
+				return ret;
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_fixup[1],
@@ -1592,12 +1589,11 @@
 			if (ret)
 				return ret;
 
-			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+			set_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv);
 		}
 	}
 
-	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
-	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_SAVE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->context_gmem_shadow.gmem_save[1],
 			context->context_gmem_shadow.gmem_save[2] << 2, true);
@@ -1610,12 +1606,13 @@
 
 		if (ret)
 			return ret;
+
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->chicken_restore[1],
 			context->chicken_restore[2] << 2, true);
 
 		/* Restore TP0_CHICKEN */
-		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+		if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->chicken_restore, 3);
@@ -1625,7 +1622,7 @@
 		}
 		adreno_dev->gpudev->ctx_switches_since_last_draw = 0;
 
-		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+		set_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	} else if (adreno_is_a2xx(adreno_dev))
 		return a2xx_drawctxt_draw_workaround(adreno_dev, context);
 
@@ -1646,7 +1643,7 @@
 	 *  restore gmem.
 	 *  (note: changes shader. shader must not already be restored.)
 	 */
-	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->context_gmem_shadow.gmem_restore[1],
 			context->context_gmem_shadow.gmem_restore[2] << 2,
@@ -1658,7 +1655,7 @@
 		if (ret)
 			return ret;
 
-		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+		if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->chicken_restore[1],
@@ -1671,11 +1668,10 @@
 			if (ret)
 				return ret;
 		}
-
-		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+		clear_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->reg_restore[1],
 			context->reg_restore[2] << 2, true);
@@ -1687,7 +1683,7 @@
 			return ret;
 
 		/* restore shader instructions & partitioning. */
-		if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+		if (test_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_restore[1],
@@ -2093,6 +2089,8 @@
 	kgsl_regwrite(device, REG_SQ_INT_CNTL, 0);
 
 	a2xx_gmeminit(adreno_dev);
+
+	kgsl_regwrite(device, REG_CP_DEBUG, A2XX_CP_DEBUG_DEFAULT);
 }
 
 static void a2xx_postmortem_dump(struct adreno_device *adreno_dev)
diff --git a/drivers/gpu/msm2/adreno_a3xx.c b/drivers/gpu/msm2/adreno_a3xx.c
index 3934226..65d1d06 100644
--- a/drivers/gpu/msm2/adreno_a3xx.c
+++ b/drivers/gpu/msm2/adreno_a3xx.c
@@ -2297,8 +2297,6 @@
 static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 				     struct adreno_context *drawctxt)
 {
-	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
-
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_constantrestore_cmds(adreno_dev, drawctxt);
 	build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
@@ -2320,7 +2318,8 @@
 	calc_gmemsize(&drawctxt->context_gmem_shadow, adreno_dev->gmem_size);
 	tmp_ctx.gmem_base = adreno_dev->gmem_base;
 
-	result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
+	result = kgsl_allocate(&(adreno_dev->dev),
+		&drawctxt->context_gmem_shadow.gmemshadow,
 		drawctxt->base.proc_priv->pagetable,
 		drawctxt->context_gmem_shadow.size);
 
@@ -2338,8 +2337,6 @@
 	kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
 		KGSL_CACHE_OP_FLUSH);
 
-	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
-
 	return 0;
 }
 
@@ -2371,14 +2368,14 @@
 	 * Nothing to do here if the context is using preambles and doesn't need
 	 * GMEM save/restore
 	 */
-	if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) &&
-		(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
+		(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
 		drawctxt->ops = &adreno_preamble_ctx_ops;
 		return 0;
 	}
 	drawctxt->ops = &a3xx_legacy_ctx_ops;
 
-	ret = kgsl_allocate(&drawctxt->gpustate,
+	ret = kgsl_allocate(&(adreno_dev->dev), &drawctxt->gpustate,
 		drawctxt->base.proc_priv->pagetable, CONTEXT_SIZE);
 
 	if (ret)
@@ -2388,15 +2385,15 @@
 			CONTEXT_SIZE);
 	tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
 
-		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+		set_bit(ADRENO_CONTEXT_SHADER_SAVE, &drawctxt->priv);
 	}
 
-	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC))
 		ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
 
 done:
@@ -2415,7 +2412,7 @@
 	if (context->state == ADRENO_CONTEXT_STATE_INVALID)
 		return 0;
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Fixup self modifying IBs for save operations */
 		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
@@ -2429,19 +2426,17 @@
 		if (ret)
 			return ret;
 
-		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+		if (test_bit(ADRENO_CONTEXT_SHADER_SAVE, &context->priv)) {
 			/* Save shader instructions */
 			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
 			if (ret)
 				return ret;
-
-			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+			set_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv);
 		}
 	}
 
-	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
-	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_SAVE, &context->priv)) {
 		/*
 		 * Save GMEM (note: changes shader. shader must
 		 * already be saved.)
@@ -2459,7 +2454,7 @@
 		if (ret)
 			return ret;
 
-		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+		set_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
 
 	return 0;
@@ -2481,7 +2476,7 @@
 	 * Shader must not already be restored.)
 	 */
 
-	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device,
 			&context->gpustate,
 			context->context_gmem_shadow.gmem_restore[1],
@@ -2494,10 +2489,10 @@
 					    gmem_restore, 3);
 		if (ret)
 			return ret;
-		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+		clear_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
 		if (ret)
@@ -2516,12 +2511,13 @@
 		if (ret)
 			return ret;
 
-		if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
+		if (test_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv)) {
 			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->shader_restore, 3);
 			if (ret)
 				return ret;
+		}
 		/* Restore HLSQ_CONTROL_0 register */
 		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
@@ -3037,8 +3033,10 @@
 	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000001);
 	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000000);
 	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000000);
-	/* Protected mode control - turned off for A3XX/A4XX */
-	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000000);
+
+	/* Enable protected mode registers for A3XX */
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x20000000);
+
 	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000000);
 	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, 0x00000000);
 
@@ -3100,9 +3098,16 @@
 	case A3XX_INT_CP_HW_FAULT:
 		err = "ringbuffer hardware fault";
 		break;
-	case A3XX_INT_CP_REG_PROTECT_FAULT:
-		err = "ringbuffer protected mode error interrupt";
-		break;
+	case A3XX_INT_CP_REG_PROTECT_FAULT: {
+		unsigned int reg;
+		kgsl_regread(device, A3XX_CP_PROTECT_STATUS, &reg);
+
+		KGSL_DRV_CRIT(device,
+			"CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0x1FFFF) >> 2);
+		goto done;
+	}
 	case A3XX_INT_CP_AHB_ERROR_HALT:
 		err = "ringbuffer AHB error interrupt";
 		break;
@@ -3137,7 +3142,6 @@
 
 	device->pwrctrl.irq_last = 1;
 	queue_work(device->work_queue, &device->ts_expired_ws);
-
 	adreno_dispatcher_schedule(device);
 }
 
@@ -3427,7 +3431,7 @@
 static struct {
 	void (*func)(struct adreno_device *, int);
 } a3xx_irq_funcs[] = {
-	A3XX_IRQ_CALLBACK(NULL),               /* 0 - RBBM_GPU_IDLE */
+	A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
 	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
 	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 2 - RBBM_REG_TIMEOUT */
 	A3XX_IRQ_CALLBACK(a3xx_err_callback),  /* 3 - RBBM_ME_MS_TIMEOUT */
@@ -3679,141 +3683,158 @@
 
 static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
-		0, A3XX_CP_PERFCOUNTER_SELECT },
+		A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
-		1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
-		2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
-		3, A3XX_PC_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
-		4, A3XX_PC_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
-		5, A3XX_PC_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
-		6, A3XX_PC_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
-		7, A3XX_VFD_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
-		8, A3XX_VFD_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
-		9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9,
+		A3XX_HLSQ_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
-		10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10,
+		A3XX_HLSQ_PERFCOUNTER1_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
-		11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11,
+		A3XX_HLSQ_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
-		12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12,
+		A3XX_HLSQ_PERFCOUNTER3_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
-		13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13,
+		A3XX_HLSQ_PERFCOUNTER4_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
-		14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
+		A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14,
+		A3XX_HLSQ_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
-		15, A3XX_VPC_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
-		16, A3XX_VPC_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
-		17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
-		18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
-		19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
-		20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
-		21, A3XX_UCHE_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_0_HI, 21,
+		A3XX_UCHE_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
-		22, A3XX_UCHE_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_1_HI, 22,
+		A3XX_UCHE_PERFCOUNTER1_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
-		23, A3XX_UCHE_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_2_HI, 23,
+		A3XX_UCHE_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
-		24, A3XX_UCHE_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_3_HI, 24,
+		A3XX_UCHE_PERFCOUNTER3_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
-		25, A3XX_UCHE_PERFCOUNTER4_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_4_HI, 25,
+		A3XX_UCHE_PERFCOUNTER4_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
-		26, A3XX_UCHE_PERFCOUNTER5_SELECT },
+		A3XX_RBBM_PERFCTR_UCHE_5_HI, 26,
+		A3XX_UCHE_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
-		27, A3XX_TP_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
-		28, A3XX_TP_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
-		29, A3XX_TP_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
-		30, A3XX_TP_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
-		31, A3XX_TP_PERFCOUNTER4_SELECT },
+		A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
-		32, A3XX_TP_PERFCOUNTER5_SELECT },
+		A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
-		33, A3XX_SP_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
-		34, A3XX_SP_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
-		35, A3XX_SP_PERFCOUNTER2_SELECT },
+		A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
-		36, A3XX_SP_PERFCOUNTER3_SELECT },
+		A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
-		37, A3XX_SP_PERFCOUNTER4_SELECT },
+		A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
-		38, A3XX_SP_PERFCOUNTER5_SELECT },
+		A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
-		39, A3XX_SP_PERFCOUNTER6_SELECT },
+		A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
-		40, A3XX_SP_PERFCOUNTER7_SELECT },
+		A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
-		41, A3XX_RB_PERFCOUNTER0_SELECT },
+		A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
-		42, A3XX_RB_PERFCOUNTER1_SELECT },
+		A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
-		-1, 0 },
+		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
 	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO,
-		-1, 0 },
+		A3XX_RBBM_PERFCTR_PWR_1_HI, -1, 0 },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO, -1, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO,
+		A3XX_VBIF_PERF_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO,
+		A3XX_VBIF_PERF_CNT1_HI, -1, 0 },
 };
 static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO, -1, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO, -1, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO,
+		A3XX_VBIF_PERF_PWR_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO,
+		A3XX_VBIF_PERF_PWR_CNT1_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO,
+		A3XX_VBIF_PERF_PWR_CNT2_HI, -1, 0 },
 };
 
 static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
@@ -3879,35 +3900,43 @@
 	if (adreno_dev->fast_hang_detect) {
 		ret = adreno_perfcounter_get(adreno_dev,
 			KGSL_PERFCOUNTER_GROUP_SP,
-			SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6],
+			SP_ALU_ACTIVE_CYCLES,
+			&ft_detect_regs[6], &ft_detect_regs[7],
 			PERFCOUNTER_FLAG_KERNEL);
 		if (ret)
 			goto err;
-		ft_detect_regs[7] = ft_detect_regs[6] + 1;
 		ret = adreno_perfcounter_get(adreno_dev,
 			KGSL_PERFCOUNTER_GROUP_SP,
-			SP0_ICL1_MISSES, &ft_detect_regs[8],
+			SP0_ICL1_MISSES,
+			&ft_detect_regs[8], &ft_detect_regs[9],
 			PERFCOUNTER_FLAG_KERNEL);
 		if (ret)
 			goto err;
-		ft_detect_regs[9] = ft_detect_regs[8] + 1;
 		ret = adreno_perfcounter_get(adreno_dev,
 			KGSL_PERFCOUNTER_GROUP_SP,
-			SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10],
+			SP_FS_CFLOW_INSTRUCTIONS,
+			&ft_detect_regs[10], &ft_detect_regs[11],
 			PERFCOUNTER_FLAG_KERNEL);
 		if (ret)
 			goto err;
-		ft_detect_regs[11] = ft_detect_regs[10] + 1;
 	}
 
 	ret = adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
-		SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL);
+		SP_FS_FULL_ALU_INSTRUCTIONS, NULL, NULL,
+		PERFCOUNTER_FLAG_KERNEL);
 	if (ret)
 		goto err;
 
 	/* Reserve and start countable 1 in the PWR perfcounter group */
 	ret = adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
-			NULL, PERFCOUNTER_FLAG_KERNEL);
+			NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
+	if (ret)
+		goto err;
+
+	/* VBIF waiting for RAM */
+	ret = adreno_perfcounter_get(adreno_dev,
+				KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0,
+				NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
 	if (ret)
 		goto err;
 
@@ -3926,29 +3955,36 @@
  */
 static void a3xx_protect_init(struct kgsl_device *device)
 {
+	int index = 0;
+
 	/* enable access protection to privileged registers */
 	kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
 
 	/* RBBM registers */
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_3, 0x60000108);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400);
+	adreno_set_protected_registers(device, &index, 0x18, 0);
+	adreno_set_protected_registers(device, &index, 0x20, 2);
+	adreno_set_protected_registers(device, &index, 0x33, 0);
+	adreno_set_protected_registers(device, &index, 0x42, 0);
+	adreno_set_protected_registers(device, &index, 0x50, 4);
+	adreno_set_protected_registers(device, &index, 0x63, 0);
+	adreno_set_protected_registers(device, &index, 0x100, 4);
 
 	/* CP registers */
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178);
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180);
+	adreno_set_protected_registers(device, &index, 0x1C0, 5);
+	adreno_set_protected_registers(device, &index, 0x1EC, 1);
+	adreno_set_protected_registers(device, &index, 0x1F6, 1);
+	adreno_set_protected_registers(device, &index, 0x1F8, 2);
+	adreno_set_protected_registers(device, &index, 0x45E, 2);
+	adreno_set_protected_registers(device, &index, 0x460, 4);
 
 	/* RB registers */
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300);
+	adreno_set_protected_registers(device, &index, 0xCC0, 0);
 
 	/* VBIF registers */
-	kgsl_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000);
+	adreno_set_protected_registers(device, &index, 0x3000, 6);
+
+	/* SMMU registers */
+	adreno_set_protected_registers(device, &index, 0x4000, 14);
 }
 
 static void a3xx_start(struct adreno_device *adreno_dev)
@@ -4011,6 +4047,9 @@
 	/* Turn on the GPU busy counter and let it run free */
 
 	adreno_dev->gpu_cycles = 0;
+
+	/* the CP_DEBUG register offset and value are same as A2XX */
+	kgsl_regwrite(device, REG_CP_DEBUG, A2XX_CP_DEBUG_DEFAULT);
 }
 
 /**
@@ -4366,6 +4405,8 @@
 	ADRENO_REG_DEFINE(ADRENO_REG_TC_CNTL_STATUS, REG_TC_CNTL_STATUS),
 	ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, REG_TP0_CHICKEN),
 	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A3XX_RBBM_RBBM_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0,
+			A3XX_UCHE_CACHE_INVALIDATE0_REG),
 };
 
 const struct adreno_reg_offsets a3xx_reg_offsets = {
@@ -4392,6 +4433,6 @@
 	.coresight_enable = a3xx_coresight_enable,
 	.coresight_disable = a3xx_coresight_disable,
 	.coresight_config_debug_reg = a3xx_coresight_config_debug_reg,
-	.postmortem_dump = a3xx_postmortem_dump,
 	.soft_reset = a3xx_soft_reset,
+	.postmortem_dump = a3xx_postmortem_dump,
 };
diff --git a/drivers/gpu/msm2/adreno_a3xx_snapshot.c b/drivers/gpu/msm2/adreno_a3xx_snapshot.c
index ba0ef6a..9f5765d 100644
--- a/drivers/gpu/msm2/adreno_a3xx_snapshot.c
+++ b/drivers/gpu/msm2/adreno_a3xx_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -277,7 +277,6 @@
 
 	struct kgsl_snapshot_debugbus *header = snapshot;
 	struct debugbus_block *block = priv;
-	unsigned int val;
 	int i;
 	unsigned int *data = snapshot + sizeof(*header);
 	unsigned int dwords;
@@ -300,8 +299,6 @@
 		return 0;
 	}
 
-	val = (block->block_id << 8) | (1 << 16);
-
 	header->id = block->block_id;
 	header->count = dwords;
 
@@ -495,6 +492,22 @@
 	/* Reading these will hang the GPU if it isn't already hung */
 
 	if (hang) {
+		unsigned int reg;
+
+		/*
+		 * Reading the microcode while the CP will is running will
+		 * basically basically move the CP instruction pointer to
+		 * whatever address we read. Big badaboom ensues. Stop the CP
+		 * (if it isn't already stopped) to ensure that we are safe.
+		 * We do this here and not earlier to avoid corrupting the RBBM
+		 * status and CP registers - by the time we get here we don't
+		 * care about the contents of the CP anymore.
+		 */
+
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+
 		snapshot = kgsl_snapshot_add_section(device,
 			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
 			a3xx_snapshot_cp_pfp_ram, NULL);
diff --git a/drivers/gpu/msm2/adreno_a3xx_trace.h b/drivers/gpu/msm2/adreno_a3xx_trace.h
index d48faf4..75156e4 100644
--- a/drivers/gpu/msm2/adreno_a3xx_trace.h
+++ b/drivers/gpu/msm2/adreno_a3xx_trace.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -48,7 +48,7 @@
 		"d_name=%s status=%s",
 		__get_str(device_name),
 		__entry->status ? __print_flags(__entry->status, "|",
-			{ 1 << A3XX_INT_RBBM_AHB_ERROR, "RBBM_GPU_IDLE" },
+			{ 1 << A3XX_INT_RBBM_GPU_IDLE, "RBBM_GPU_IDLE" },
 			{ 1 << A3XX_INT_RBBM_AHB_ERROR, "RBBM_AHB_ERR" },
 			{ 1 << A3XX_INT_RBBM_REG_TIMEOUT, "RBBM_REG_TIMEOUT" },
 			{ 1 << A3XX_INT_RBBM_ME_MS_TIMEOUT,
diff --git a/drivers/gpu/msm2/adreno_debugfs.c b/drivers/gpu/msm2/adreno_debugfs.c
index 12804a3..a7d1b7f 100644
--- a/drivers/gpu/msm2/adreno_debugfs.c
+++ b/drivers/gpu/msm2/adreno_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,8 +23,6 @@
 
 #include "a2xx_reg.h"
 
-unsigned int kgsl_cff_dump_enable;
-
 DEFINE_SIMPLE_ATTRIBUTE(kgsl_cff_dump_enable_fops, kgsl_cff_dump_enable_get,
 			kgsl_cff_dump_enable_set, "%llu\n");
 
@@ -56,41 +54,6 @@
 		&adreno_dev->wait_timeout);
 	debugfs_create_u32("ib_check", 0644, device->d_debugfs,
 			   &adreno_dev->ib_check_level);
-	/* By Default enable fast hang detection */
-	adreno_dev->fast_hang_detect = 1;
-	debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs,
-			   &adreno_dev->fast_hang_detect);
-	/*
-	 * FT policy can be set to any of the options below.
-	 * KGSL_FT_OFF -> BIT(0) Set to turn off FT
-	 * KGSL_FT_REPLAY  -> BIT(1) Set to enable replay
-	 * KGSL_FT_SKIPIB  -> BIT(2) Set to skip IB
-	 * KGSL_FT_SKIPFRAME -> BIT(3) Set to skip frame
-	 * KGSL_FT_DISABLE -> BIT(4) Set to disable FT for faulting context
-	 * by default set FT policy to KGSL_FT_DEFAULT_POLICY
-	 */
-	adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
-	debugfs_create_u32("ft_policy", 0644, device->d_debugfs,
-			   &adreno_dev->ft_policy);
-	/* By default enable long IB detection */
-	adreno_dev->long_ib_detect = 1;
-	debugfs_create_u32("long_ib_detect", 0644, device->d_debugfs,
-			   &adreno_dev->long_ib_detect);
-
-	/*
-	 * FT pagefault policy can be set to any of the options below.
-	 * KGSL_FT_PAGEFAULT_INT_ENABLE -> BIT(0) set to enable pagefault INT
-	 * KGSL_FT_PAGEFAULT_GPUHALT_ENABLE  -> BIT(1) Set to enable GPU HALT on
-	 * pagefaults. This stalls the GPU on a pagefault on IOMMU v1 HW.
-	 * KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE  -> BIT(2) Set to log only one
-	 * pagefault per page.
-	 * KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT -> BIT(3) Set to log only one
-	 * pagefault per INT.
-	 */
-	 adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
-	 debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
-			&adreno_dev->ft_pf_policy);
-
 	debugfs_create_file("active_cnt", 0444, device->d_debugfs, device,
 			    &_active_count_fops);
 }
diff --git a/drivers/gpu/msm2/adreno_dispatch.c b/drivers/gpu/msm2/adreno_dispatch.c
index 357242f..ddf275c 100644
--- a/drivers/gpu/msm2/adreno_dispatch.c
+++ b/drivers/gpu/msm2/adreno_dispatch.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -33,6 +33,15 @@
 /* Number of command batches sent at a time from a single context */
 static unsigned int _context_cmdbatch_burst = 5;
 
+/*
+ * GFT throttle parameters. If GFT recovered more than
+ * X times in Y ms invalidate the context and do not attempt recovery.
+ * X -> _fault_throttle_burst
+ * Y -> _fault_throttle_time
+ */
+static unsigned int _fault_throttle_time = 3000;
+static unsigned int _fault_throttle_burst = 3;
+
 /* Number of command batches inflight in the ringbuffer at any time */
 static unsigned int _dispatcher_inflight = 15;
 
@@ -186,10 +195,9 @@
 		return -EINVAL;
 	}
 
-	prev = drawctxt->cmdqueue_head - 1;
-
-	if (prev < 0)
-		prev = ADRENO_CONTEXT_CMDQUEUE_SIZE - 1;
+	prev = drawctxt->cmdqueue_head == 0 ?
+		(ADRENO_CONTEXT_CMDQUEUE_SIZE - 1) :
+		(drawctxt->cmdqueue_head - 1);
 
 	/*
 	 * The maximum queue size always needs to be one less then the size of
@@ -225,7 +233,6 @@
 
 	spin_lock(&dispatcher->plist_lock);
 
-
 	if (plist_node_empty(&drawctxt->pending)) {
 		/* Get a reference to the context while it sits on the list */
 		if (_kgsl_context_get(&drawctxt->base)) {
@@ -350,9 +357,6 @@
 
 		cmdbatch = adreno_dispatcher_get_cmdbatch(drawctxt);
 
-		if (cmdbatch == NULL)
-			break;
-
 		/*
 		 * adreno_context_get_cmdbatch returns -EAGAIN if the current
 		 * cmdbatch has pending sync points so no more to do here.
@@ -360,8 +364,9 @@
 		 * reqeueued
 		 */
 
-		if (IS_ERR(cmdbatch) && PTR_ERR(cmdbatch) == -EAGAIN) {
-			requeued = 1;
+		if (IS_ERR_OR_NULL(cmdbatch)) {
+			if (IS_ERR(cmdbatch) && PTR_ERR(cmdbatch) == -EAGAIN)
+				requeued = 1;
 			break;
 		}
 
@@ -402,7 +407,7 @@
 	 */
 
 	if (count)
-		wake_up_interruptible_all(&drawctxt->wq);
+		wake_up_all(&drawctxt->wq);
 
 	/*
 	 * Return positive if the context submitted commands or if we figured
@@ -563,7 +568,7 @@
 		return 0;
 	}
 
-	if (drawctxt->flags & CTXT_FLAGS_USER_GENERATED_TS) {
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
 		/*
 		 * User specified timestamps need to be greater than the last
 		 * issued timestamp in the context
@@ -580,7 +585,7 @@
 }
 
 /**
- * adreno_dispatcher_queue_cmd() - Queue a new command in the context
+ * adreno_dispactcher_queue_cmd() - Queue a new command in the context
  * @adreno_dev: Pointer to the adreno device struct
  * @drawctxt: Pointer to the adreno draw context
  * @cmdbatch: Pointer to the command batch being submitted
@@ -607,10 +612,8 @@
 	 * to run (if it exists) regardless of the context state.
 	 */
 
-	if (drawctxt->flags & CTXT_FLAGS_FORCE_PREAMBLE) {
+	if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv))
 		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
-		drawctxt->flags &= ~CTXT_FLAGS_FORCE_PREAMBLE;
-	}
 
 	/*
 	 * If we are waiting for the end of frame and it hasn't appeared yet,
@@ -618,7 +621,7 @@
 	 * through the pipeline but it won't actually send any commands
 	 */
 
-	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+	if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv)) {
 		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
 
 		/*
@@ -627,14 +630,13 @@
 		 */
 
 		if (cmdbatch->flags & KGSL_CONTEXT_END_OF_FRAME) {
-			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+			clear_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv);
 
 			/*
 			 * Force the preamble on the next command to ensure that
 			 * the state is correct
 			 */
-
-			drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE;
+			set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv);
 		}
 	}
 
@@ -680,10 +682,10 @@
 
 	/*
 	 * Set the fault tolerance policy for the command batch - assuming the
-	 * context hsn't disabled FT use the current device policy
+	 * context hasn't disabled FT use the current device policy
 	 */
 
-	if (drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE)
+	if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
 		set_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy);
 	else
 		cmdbatch->fault_policy = adreno_dev->ft_policy;
@@ -717,6 +719,44 @@
 	return 0;
 }
 
+static int _mark_context(int id, void *ptr, void *data)
+{
+	unsigned int guilty = *((unsigned int *) data);
+	struct kgsl_context *context = ptr;
+
+	/*
+	 * If the context is guilty mark it as such.  Otherwise mark it as
+	 * innocent if it had not already been marked as guilty.  If id is
+	 * passed as 0 then mark EVERYBODY guilty (recovery failed)
+	 */
+
+	if (guilty == 0 || guilty == context->id)
+		context->reset_status =
+			KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+	else if (context->reset_status !=
+		KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT)
+		context->reset_status =
+			KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
+
+	return 0;
+}
+
+/**
+ * mark_guilty_context() - Mark the given context as guilty (failed recovery)
+ * @device: Pointer to a KGSL device structure
+ * @id: Context ID of the guilty context (or 0 to mark all as guilty)
+ *
+ * Mark the given (or all) context(s) as guilty (failed recovery)
+ */
+static void mark_guilty_context(struct kgsl_device *device, unsigned int id)
+{
+	/* Mark the status for all the contexts in the device */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, _mark_context, &id);
+	read_unlock(&device->context_lock);
+}
+
 /*
  * If an IB inside of the command batch has a gpuaddr that matches the base
  * passed in then zero the size which effectively skips it when it is submitted
@@ -775,7 +815,7 @@
 	 */
 
 	if (skip && drawctxt)
-		drawctxt->flags |= CTXT_FLAGS_SKIP_EOF;
+		set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv);
 
 	/*
 	 * If we did see the EOF flag then force the preamble on for the
@@ -783,7 +823,7 @@
 	 */
 
 	if (!skip && drawctxt)
-		drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE;
+		set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv);
 }
 
 static void remove_invalidated_cmdbatches(struct kgsl_device *device,
@@ -950,6 +990,9 @@
 	if (replay == NULL) {
 		unsigned int ptr = dispatcher->head;
 
+		/* Recovery failed - mark everybody guilty */
+		mark_guilty_context(device, 0);
+
 		while (ptr != dispatcher->tail) {
 			struct kgsl_context *context =
 				dispatcher->cmdqueue[ptr]->context;
@@ -985,6 +1028,35 @@
 	cmdbatch = replay[0];
 
 	/*
+	 * If GFT recovered more than X times in Y ms invalidate the context
+	 * and do not attempt recovery.
+	 * Example: X==3 and Y==3000 ms, GPU hung at 500ms, 1700ms, 25000ms and
+	 * 3000ms for the same context, we will not try FT and invalidate the
+	 * context @3000ms because context triggered GFT more than 3 times in
+	 * last 3 seconds. If a context caused recoverable GPU hangs
+	 * where 1st and 4th gpu hang are more than 3 seconds apart we
+	 * won't disable GFT and invalidate the context.
+	 */
+	if (test_bit(KGSL_FT_THROTTLE, &cmdbatch->fault_policy)) {
+		if (time_after(jiffies, (cmdbatch->context->fault_time
+				+ msecs_to_jiffies(_fault_throttle_time)))) {
+			cmdbatch->context->fault_time = jiffies;
+			cmdbatch->context->fault_count = 1;
+		} else {
+			cmdbatch->context->fault_count++;
+			if (cmdbatch->context->fault_count >
+					_fault_throttle_burst) {
+				set_bit(KGSL_FT_DISABLE,
+						&cmdbatch->fault_policy);
+				pr_fault(device, cmdbatch,
+					 "gpu fault threshold exceeded %d faults in %d msecs\n",
+					 _fault_throttle_burst,
+					 _fault_throttle_time);
+			}
+		}
+	}
+
+	/*
 	 * If FT is disabled for this cmdbatch invalidate immediately
 	 */
 
@@ -993,6 +1065,7 @@
 		pr_fault(device, cmdbatch, "gpu skipped ctx %d ts %d\n",
 			cmdbatch->context->id, cmdbatch->timestamp);
 
+		mark_guilty_context(device, cmdbatch->context->id);
 		adreno_drawctxt_invalidate(device, cmdbatch->context);
 	}
 
@@ -1088,6 +1161,9 @@
 	pr_fault(device, cmdbatch, "gpu failed ctx %d ts %d\n",
 		cmdbatch->context->id, cmdbatch->timestamp);
 
+	/* Mark the context as failed */
+	mark_guilty_context(device, cmdbatch->context->id);
+
 	/* Invalidate the context */
 	adreno_drawctxt_invalidate(device, cmdbatch->context);
 
@@ -1148,6 +1224,9 @@
 				"gpu reset failed ctx %d ts %d\n",
 				replay[i]->context->id, replay[i]->timestamp);
 
+			/* Mark this context as guilty (failed recovery) */
+			mark_guilty_context(device, replay[i]->context->id);
+
 			adreno_drawctxt_invalidate(device, replay[i]->context);
 			remove_invalidated_cmdbatches(device, &replay[i],
 				count - i);
@@ -1204,6 +1283,7 @@
 		container_of(dispatcher, struct adreno_device, dispatcher);
 	struct kgsl_device *device = &adreno_dev->dev;
 	int count = 0;
+	int last_context = KGSL_CONTEXT_INVALID;
 	int fault_handled = 0;
 
 	mutex_lock(&dispatcher->mutex);
@@ -1235,12 +1315,22 @@
 			 * successful completion to the world
 			 */
 
-			if (cmdbatch->fault_recovery != 0)
+			if (cmdbatch->fault_recovery != 0) {
+				struct adreno_context *drawctxt =
+					ADRENO_CONTEXT(cmdbatch->context);
+
+				/* Mark the context as faulted and recovered */
+				set_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv);
+
 				_print_recovery(device, cmdbatch);
+			}
 
 			trace_adreno_cmdbatch_retired(cmdbatch,
 				dispatcher->inflight - 1);
 
+			/* Remember the last context that got retired */
+			last_context = cmdbatch->context->id;
+
 			/* Reduce the number of inflight command batches */
 			dispatcher->inflight--;
 
@@ -1289,7 +1379,7 @@
 		 */
 
 		if (!adreno_dev->long_ib_detect ||
-			drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE)
+			drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
 			break;
 
 		/*
@@ -1345,6 +1435,18 @@
 		/* Update the timeout timer for the next command batch */
 		mod_timer(&dispatcher->timer, cmdbatch->expires);
 
+		/*
+		 * if the context for the next pending cmdbatch is different
+		 * than the last one we retired, then trace it as a GPU switch
+		 */
+
+		if (cmdbatch->context->id != last_context) {
+			u64 now = ktime_to_ns(ktime_get());
+			kgsl_trace_gpu_sched_switch(device->name, now,
+				cmdbatch->context->id, cmdbatch->priority,
+				cmdbatch->timestamp);
+		}
+
 		/* There are still things in flight - update the idle counts */
 		mutex_lock(&device->mutex);
 		kgsl_pwrscale_idle(device);
@@ -1463,7 +1565,6 @@
  * adreno_dispatcher_start() - activate the dispatcher
  * @adreno_dev: pointer to the adreno device structure
  *
- * Set the disaptcher active and start the loop once to get things going
  */
 void adreno_dispatcher_start(struct kgsl_device *device)
 {
@@ -1577,6 +1678,10 @@
 static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
 static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
 	_fault_timer_interval);
+static DISPATCHER_UINT_ATTR(fault_throttle_time, 0644, 0,
+	_fault_throttle_time);
+static DISPATCHER_UINT_ATTR(fault_throttle_burst, 0644, 0,
+	_fault_throttle_burst);
 
 static struct attribute *dispatcher_attrs[] = {
 	&dispatcher_attr_inflight.attr,
@@ -1585,6 +1690,8 @@
 	&dispatcher_attr_cmdbatch_timeout.attr,
 	&dispatcher_attr_context_queue_wait.attr,
 	&dispatcher_attr_fault_detect_interval.attr,
+	&dispatcher_attr_fault_throttle_time.attr,
+	&dispatcher_attr_fault_throttle_burst.attr,
 	NULL,
 };
 
diff --git a/drivers/gpu/msm2/adreno_drawctxt.c b/drivers/gpu/msm2/adreno_drawctxt.c
index a62c96c..fa03a06 100644
--- a/drivers/gpu/msm2/adreno_drawctxt.c
+++ b/drivers/gpu/msm2/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -138,7 +138,7 @@
 		u32 timestamp, u32 type)
 {
 	struct adreno_context *drawctxt = priv;
-	wake_up_interruptible_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->waiting);
 }
 
 #define adreno_wait_event_interruptible_timeout(wq, condition, timeout, io)   \
@@ -266,20 +266,18 @@
 {
 	struct adreno_context *drawctxt = priv;
 
-	wake_up_interruptible_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->waiting);
 	kgsl_context_put(&drawctxt->base);
 }
 
 static int _check_global_timestamp(struct kgsl_device *device,
-		unsigned int timestamp)
+		struct adreno_context *drawctxt, unsigned int timestamp)
 {
-	int ret;
+	/* Stop waiting if the context is invalidated */
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		return 1;
 
-	mutex_lock(&device->mutex);
-	ret = kgsl_check_timestamp(device, NULL, timestamp);
-	mutex_unlock(&device->mutex);
-
-	return ret;
+	return kgsl_check_timestamp(device, NULL, timestamp);
 }
 
 int adreno_drawctxt_wait_global(struct adreno_device *adreno_dev,
@@ -288,7 +286,7 @@
 {
 	struct kgsl_device *device = &adreno_dev->dev;
 	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
-	int ret;
+	int ret = 0;
 
 	/* Needs to hold the device mutex */
 	BUG_ON(!mutex_is_locked(&device->mutex));
@@ -298,6 +296,15 @@
 		goto done;
 	}
 
+	/*
+	 * If the context is invalid then return immediately - we may end up
+	 * waiting for a timestamp that will never come
+	 */
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+		kgsl_context_put(context);
+		goto done;
+	}
+
 	trace_adreno_drawctxt_wait_start(KGSL_MEMSTORE_GLOBAL, timestamp);
 
 	ret = kgsl_add_event(device, KGSL_MEMSTORE_GLOBAL, timestamp,
@@ -311,7 +318,7 @@
 
 	if (timeout) {
 		ret = (int) wait_event_timeout(drawctxt->waiting,
-			_check_global_timestamp(device, timestamp),
+			_check_global_timestamp(device, drawctxt, timestamp),
 			msecs_to_jiffies(timeout));
 
 		if (ret == 0)
@@ -320,7 +327,7 @@
 			ret = 0;
 	} else {
 		wait_event(drawctxt->waiting,
-			_check_global_timestamp(device, timestamp));
+			_check_global_timestamp(device, drawctxt, timestamp));
 	}
 
 	mutex_lock(&device->mutex);
@@ -384,8 +391,8 @@
 	mutex_unlock(&drawctxt->mutex);
 
 	/* Give the bad news to everybody waiting around */
-	wake_up_interruptible_all(&drawctxt->waiting);
-	wake_up_interruptible_all(&drawctxt->wq);
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
 }
 
 /**
@@ -405,6 +412,7 @@
 	int ret;
 
 	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
 	if (drawctxt == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -417,25 +425,16 @@
 	drawctxt->bin_base_offset = 0;
 	drawctxt->timestamp = 0;
 
-	*flags &= (KGSL_CONTEXT_PREAMBLE |
+	drawctxt->base.flags = *flags & (KGSL_CONTEXT_PREAMBLE |
 		KGSL_CONTEXT_NO_GMEM_ALLOC |
 		KGSL_CONTEXT_PER_CONTEXT_TS |
 		KGSL_CONTEXT_USER_GENERATED_TS |
 		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
-		KGSL_CONTEXT_TYPE_MASK);
+		KGSL_CONTEXT_TYPE_MASK |
+		KGSL_CONTEXT_PWR_CONSTRAINT);
 
 	/* Always enable per-context timestamps */
-	*flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
-	drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
-
-	if (*flags & KGSL_CONTEXT_PREAMBLE)
-		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
-
-	if (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
-		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
-
-	if (*flags & KGSL_CONTEXT_USER_GENERATED_TS)
-		drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
 
 	mutex_init(&drawctxt->mutex);
 	init_waitqueue_head(&drawctxt->wq);
@@ -449,18 +448,12 @@
 
 	plist_node_init(&drawctxt->pending, ADRENO_CONTEXT_DEFAULT_PRIORITY);
 
-	if (*flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
-		drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE;
-
-	drawctxt->type =
-		(*flags & KGSL_CONTEXT_TYPE_MASK) >> KGSL_CONTEXT_TYPE_SHIFT;
-
 	if (adreno_dev->gpudev->ctxt_create) {
 		ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 		if (ret)
 			goto err;
-	} else if ((*flags & KGSL_CONTEXT_PREAMBLE) == 0 ||
-		  (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+	} else if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		  (drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
 		KGSL_DEV_ERR_ONCE(device,
 				"legacy context switch not supported\n");
 		ret = -EINVAL;
@@ -476,7 +469,8 @@
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
 			0);
-
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
 	return &drawctxt->base;
 err:
 	kgsl_context_detach(&drawctxt->base);
@@ -555,6 +549,14 @@
 	ret = adreno_drawctxt_wait_global(adreno_dev, context,
 		drawctxt->internal_timestamp, 10 * 1000);
 
+	/*
+	 * If the wait for global fails then nothing after this point is likely
+	 * to work very well - BUG_ON() so we can take advantage of the debug
+	 * tools to figure out what the h - e - double hockey sticks happened
+	 */
+
+	BUG_ON(ret);
+
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
 			drawctxt->timestamp);
@@ -570,8 +572,8 @@
 		drawctxt->ops->detach(drawctxt);
 
 	/* wake threads waiting to submit commands from this context */
-	wake_up_interruptible_all(&drawctxt->waiting);
-	wake_up_interruptible_all(&drawctxt->wq);
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
 
 	return ret;
 }
@@ -602,14 +604,14 @@
 int adreno_context_restore(struct adreno_device *adreno_dev,
 				  struct adreno_context *context)
 {
-	int ret;
 	struct kgsl_device *device;
-	unsigned int cmds[5];
+	unsigned int cmds[8];
 
 	if (adreno_dev == NULL || context == NULL)
 		return -EINVAL;
 
 	device = &adreno_dev->dev;
+
 	/* write the context identifier to the ringbuffer */
 	cmds[0] = cp_nop_packet(1);
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
@@ -617,14 +619,13 @@
 	cmds[3] = device->memstore.gpuaddr +
 		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
 	cmds[4] = context->base.id;
-	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
-	if (ret)
-		return ret;
-
-	return kgsl_mmu_setstate(&device->mmu,
-			context->base.proc_priv->pagetable,
-			context->base.id);
+	/* Flush the UCHE for new context */
+	cmds[5] = cp_type0_packet(
+		adreno_getreg(adreno_dev, ADRENO_REG_UCHE_INVALIDATE0), 2);
+	cmds[6] = 0;
+	cmds[7] = 0x90000000;
+	return adreno_ringbuffer_issuecmds(device, context,
+				KGSL_CMD_FLAGS_NONE, cmds, 8);
 }
 
 
@@ -697,10 +698,10 @@
 		if (flags & KGSL_CONTEXT_SAVE_GMEM)
 			/* Set the flag in context so that the save is done
 			* when this context is switched out. */
-			drawctxt->flags |= CTXT_FLAGS_GMEM_SAVE;
+			set_bit(ADRENO_CONTEXT_GMEM_SAVE, &drawctxt->priv);
 		else
 			/* Remove GMEM saving flag from the context */
-			drawctxt->flags &= ~CTXT_FLAGS_GMEM_SAVE;
+			clear_bit(ADRENO_CONTEXT_GMEM_SAVE, &drawctxt->priv);
 	}
 
 	/* already current? */
@@ -714,25 +715,16 @@
 	trace_adreno_drawctxt_switch(adreno_dev->drawctxt_active,
 		drawctxt, flags);
 
-	if (adreno_dev->drawctxt_active) {
-		ret = context_save(adreno_dev, adreno_dev->drawctxt_active);
-		if (ret) {
-			KGSL_DRV_ERR(device,
-				"Error in GPU context %d save: %d\n",
-				adreno_dev->drawctxt_active->base.id, ret);
-			return ret;
-		}
-
-		/* Put the old instance of the active drawctxt */
-		kgsl_context_put(&adreno_dev->drawctxt_active->base);
-		adreno_dev->drawctxt_active = NULL;
-	}
-
 	/* Get a refcount to the new instance */
 	if (drawctxt) {
 		if (!_kgsl_context_get(&drawctxt->base))
 			return -EINVAL;
 
+		ret = kgsl_mmu_setstate(&device->mmu,
+			drawctxt->base.proc_priv->pagetable,
+			adreno_dev->drawctxt_active ?
+			adreno_dev->drawctxt_active->base.id :
+			KGSL_CONTEXT_INVALID);
 		/* Set the new context */
 		ret = drawctxt->ops->restore(adreno_dev, drawctxt);
 		if (ret) {
@@ -750,9 +742,11 @@
 		 */
 		ret = kgsl_mmu_setstate(&device->mmu,
 					 device->mmu.defaultpagetable,
-					 KGSL_CONTEXT_INVALID);
+					adreno_dev->drawctxt_active->base.id);
 	}
-
+	/* Put the old instance of the active drawctxt */
+	if (adreno_dev->drawctxt_active)
+		kgsl_context_put(&adreno_dev->drawctxt_active->base);
 	adreno_dev->drawctxt_active = drawctxt;
 	return 0;
 }
diff --git a/drivers/gpu/msm2/adreno_drawctxt.h b/drivers/gpu/msm2/adreno_drawctxt.h
index 9312db6..97147c0 100644
--- a/drivers/gpu/msm2/adreno_drawctxt.h
+++ b/drivers/gpu/msm2/adreno_drawctxt.h
@@ -16,44 +16,6 @@
 #include "adreno_pm4types.h"
 #include "a2xx_reg.h"
 
-/* Flags */
-
-#define CTXT_FLAGS_NOT_IN_USE		0x00000000
-#define CTXT_FLAGS_IN_USE		BIT(0)
-
-/* state shadow memory allocated */
-#define CTXT_FLAGS_STATE_SHADOW		BIT(1)
-
-/* gmem shadow memory allocated */
-#define CTXT_FLAGS_GMEM_SHADOW		BIT(2)
-/* gmem must be copied to shadow */
-#define CTXT_FLAGS_GMEM_SAVE		BIT(3)
-/* gmem can be restored from shadow */
-#define CTXT_FLAGS_GMEM_RESTORE		BIT(4)
-/* preamble packed in cmdbuffer for context switching */
-#define CTXT_FLAGS_PREAMBLE		BIT(5)
-/* shader must be copied to shadow */
-#define CTXT_FLAGS_SHADER_SAVE		BIT(6)
-/* shader can be restored from shadow */
-#define CTXT_FLAGS_SHADER_RESTORE	BIT(7)
-/* Context has caused a GPU hang */
-#define CTXT_FLAGS_GPU_HANG		BIT(8)
-/* Specifies there is no need to save GMEM */
-#define CTXT_FLAGS_NOGMEMALLOC          BIT(9)
-/* Trash state for context */
-#define CTXT_FLAGS_TRASHSTATE		BIT(10)
-/* per context timestamps enabled */
-#define CTXT_FLAGS_PER_CONTEXT_TS	BIT(11)
-/* Context has caused a GPU hang and fault tolerance successful */
-#define CTXT_FLAGS_GPU_HANG_FT	BIT(12)
-/* User mode generated timestamps enabled */
-#define CTXT_FLAGS_USER_GENERATED_TS    BIT(14)
-/* Context skip till EOF */
-#define CTXT_FLAGS_SKIP_EOF             BIT(15)
-/* Context no fault tolerance */
-#define CTXT_FLAGS_NO_FAULT_TOLERANCE  BIT(16)
-/* Force the preamble for the next submission */
-#define CTXT_FLAGS_FORCE_PREAMBLE      BIT(17)
 
 /* Symbolic table for the adreno draw context type */
 #define ADRENO_DRAWCTXT_TYPES \
@@ -130,7 +92,7 @@
  * @internal_timestamp: Global timestamp of the last issued command
  *			NOTE: guarded by device->mutex, not drawctxt->mutex!
  * @state: Current state of the context
- * @flags: Bitfield controlling behavior of the context
+ * @priv: Internal flags
  * @type: Context type (GL, CL, RS)
  * @mutex: Mutex to protect the cmdqueue
  * @pagetable: Pointer to the GPU pagetable for the context
@@ -169,7 +131,7 @@
 	unsigned int timestamp;
 	unsigned int internal_timestamp;
 	int state;
-	uint32_t flags;
+	unsigned long priv;
 	unsigned int type;
 	struct mutex mutex;
 	struct kgsl_memdesc gpustate;
@@ -200,8 +162,8 @@
 
 	/* Dispatcher */
 	struct kgsl_cmdbatch *cmdqueue[ADRENO_CONTEXT_CMDQUEUE_SIZE];
-	int cmdqueue_head;
-	int cmdqueue_tail;
+	unsigned int cmdqueue_head;
+	unsigned int cmdqueue_tail;
 
 	struct plist_node pending;
 	wait_queue_head_t wq;
@@ -212,6 +174,31 @@
 	const struct adreno_context_ops *ops;
 };
 
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GMEM_SAVE - gmem must be copied to shadow
+ * @ADRENO_CONTEXT_GMEM_RESTORE - gmem can be restored from shadow
+ * @ADRENO_CONTEXT_SHADER_SAVE - shader must be copied to shadow
+ * @ADRENO_CONTEXT_SHADER_RESTORE - shader can be restored from shadow
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = 0,
+	ADRENO_CONTEXT_GMEM_SAVE,
+	ADRENO_CONTEXT_GMEM_RESTORE,
+	ADRENO_CONTEXT_SHADER_SAVE,
+	ADRENO_CONTEXT_SHADER_RESTORE,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+};
 
 struct kgsl_context *adreno_drawctxt_create(struct kgsl_device_private *,
 			uint32_t *flags);
diff --git a/drivers/gpu/msm2/adreno_pm4types.h b/drivers/gpu/msm2/adreno_pm4types.h
index e6ec91d..37c3c50 100644
--- a/drivers/gpu/msm2/adreno_pm4types.h
+++ b/drivers/gpu/msm2/adreno_pm4types.h
@@ -143,10 +143,10 @@
 #define CP_IM_STORE            0x2c
 
 /* test 2 memory locations to dword values specified */
-#define CP_TEST_TWO_MEMS    0x71
+#define CP_TEST_TWO_MEMS	0x71
 
 /* PFP waits until the FIFO between the PFP and the ME is empty */
-#define CP_WAIT_FOR_ME      0x13
+#define CP_WAIT_FOR_ME		0x13
 
 /*
  * for a20x
@@ -164,6 +164,8 @@
 
 #define CP_SET_PROTECTED_MODE  0x5f /* sets the register protection mode */
 
+#define CP_BOOTSTRAP_UCODE  0x6f /* bootstraps microcode */
+
 /*
  * for a3xx
  */
diff --git a/drivers/gpu/msm2/adreno_postmortem.c b/drivers/gpu/msm2/adreno_postmortem.c
index 2c9b968..9e807cb 100644
--- a/drivers/gpu/msm2/adreno_postmortem.c
+++ b/drivers/gpu/msm2/adreno_postmortem.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/vmalloc.h>
+#include <mach/board.h>
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
@@ -51,6 +52,7 @@
 	{CP_DRAW_INDX,			"DRW_NDX_"},
 	{CP_DRAW_INDX_BIN,		"DRW_NDXB"},
 	{CP_EVENT_WRITE,		"EVENT_WT"},
+	{CP_MEM_WRITE,			"MEM_WRIT"},
 	{CP_IM_LOAD,			"IN__LOAD"},
 	{CP_IM_LOAD_IMMEDIATE,		"IM_LOADI"},
 	{CP_IM_STORE,			"IM_STORE"},
diff --git a/drivers/gpu/msm2/adreno_ringbuffer.c b/drivers/gpu/msm2/adreno_ringbuffer.c
index 9e69a49..6b0eb73 100644
--- a/drivers/gpu/msm2/adreno_ringbuffer.c
+++ b/drivers/gpu/msm2/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -20,6 +20,7 @@
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_cffdump.h"
+#include "kgsl_trace.h"
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
@@ -30,14 +31,6 @@
 
 #define GSL_RB_NOP_SIZEDWORDS				2
 
-/*
- * CP DEBUG settings for all cores:
- * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
- * PROG_END_PTR_ENABLE [25] - Allow 128 bit writes to the VBIF
- */
-
-#define CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
-
 void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
@@ -215,28 +208,19 @@
  * adreno_ringbuffer_load_pm4_ucode() - Load pm4 ucode
  * @device: Pointer to a KGSL device
  * @start: Starting index in pm4 ucode to load
+ * @end: Ending index of pm4 ucode to load
  * @addr: Address to load the pm4 ucode
  *
  * Load the pm4 ucode from @start at @addr.
  */
-int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
-					unsigned int start, unsigned int addr)
+inline int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int i;
 
-	if (adreno_dev->pm4_fw == NULL) {
-		int ret = adreno_ringbuffer_read_pm4_ucode(device);
-		if (ret)
-			return ret;
-	}
-
-	KGSL_DRV_INFO(device, "loading pm4 ucode version: %d\n",
-		adreno_dev->pm4_fw_version);
-
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_DEBUG, CP_DEBUG_DEFAULT);
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr);
-	for (i = 1; i < adreno_dev->pm4_fw_size; i++)
+	for (i = start; i < end; i++)
 		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA,
 					adreno_dev->pm4_fw[i]);
 
@@ -278,27 +262,19 @@
  * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode
  * @device: Pointer to a KGSL device
  * @start: Starting index in pfp ucode to load
+ * @end: Ending index of pfp ucode to load
  * @addr: Address to load the pfp ucode
  *
  * Load the pfp ucode from @start at @addr.
  */
-int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
-					unsigned int start, unsigned int addr)
+inline int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int i;
 
-	if (adreno_dev->pfp_fw == NULL) {
-		int ret = adreno_ringbuffer_read_pfp_ucode(device);
-		if (ret)
-			return ret;
-	}
-
-	KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
-			adreno_dev->pfp_fw_version);
-
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr);
-	for (i = 1; i < adreno_dev->pfp_fw_size; i++)
+	for (i = start; i < end; i++)
 		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
 						adreno_dev->pfp_fw[i]);
 
@@ -306,72 +282,113 @@
 }
 
 /**
- * _ringbuffer_start_common() - Ringbuffer start
+ * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode
+ * @rb: Pointer to adreno ringbuffer
+ * @load_jt: If non zero only load Jump tables
+ *
+ * Bootstrap ucode for GPU
+ * load_jt == 0, bootstrap full microcode
+ * load_jt == 1, bootstrap jump tables of microcode
+ *
+ * For example a bootstrap packet would like below
+ * Setup a type3 bootstrap packet
+ * PFP size to bootstrap
+ * PFP addr to write the PFP data
+ * PM4 size to bootstrap
+ * PM4 addr to write the PM4 data
+ * PFP dwords from microcode to bootstrap
+ * PM4 size dwords from microcode to bootstrap
+ */
+static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb,
+					unsigned int load_jt)
+{
+	unsigned int *cmds, cmds_gpu, bootstrap_size;
+	int i = 0;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr;
+
+	/* Only bootstrap jump tables of ucode */
+	if (load_jt) {
+		pm4_idx = adreno_dev->pm4_jt_idx;
+		pm4_addr = adreno_dev->pm4_jt_addr;
+		pfp_idx = adreno_dev->pfp_jt_idx;
+		pfp_addr = adreno_dev->pfp_jt_addr;
+	} else {
+		/* Bootstrap full ucode */
+		pm4_idx = 1;
+		pm4_addr = 0;
+		pfp_idx = 1;
+		pfp_addr = 0;
+	}
+
+	pm4_size = (adreno_dev->pm4_fw_size - pm4_idx);
+	pfp_size = (adreno_dev->pfp_fw_size - pfp_idx);
+
+	/*
+	 * Below set of commands register with PFP that 6f is the
+	 * opcode for bootstrapping
+	 */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005);
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	bootstrap_size = (pm4_size + pfp_size + 5);
+
+	cmds = adreno_ringbuffer_allocspace(rb, NULL, bootstrap_size);
+	if (cmds == NULL)
+			return -ENOMEM;
+
+	cmds_gpu = rb->buffer_desc.gpuaddr +
+			sizeof(uint) * (rb->wptr - bootstrap_size);
+	/* Construct the packet that bootsraps the ucode */
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu,
+			cp_type3_packet(CP_BOOTSTRAP_UCODE,
+			(bootstrap_size - 1)));
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pfp_size);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pfp_addr);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pm4_size);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pm4_addr);
+	for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+		GSL_RB_WRITE(rb->device, cmds, cmds_gpu, adreno_dev->pfp_fw[i]);
+	for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+		GSL_RB_WRITE(rb->device, cmds, cmds_gpu, adreno_dev->pm4_fw[i]);
+
+	adreno_ringbuffer_submit(rb);
+	/* idle device to validate bootstrap */
+	return adreno_idle(device);
+}
+
+/**
+ * _ringbuffer_setup_common() - Ringbuffer start
  * @rb: Pointer to adreno ringbuffer
  *
  * Setup ringbuffer for GPU.
  */
-int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
+void _ringbuffer_setup_common(struct adreno_ringbuffer *rb)
 {
-	int status;
-	union reg_cp_rb_cntl cp_rb_cntl;
-	unsigned int rb_cntl;
 	struct kgsl_device *device = rb->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	if (rb->flags & KGSL_FLAGS_STARTED)
-		return 0;
-
-	kgsl_sharedmem_set(rb->device, &rb->memptrs_desc, 0, 0,
-			   sizeof(struct kgsl_rbmemptrs));
-
 	kgsl_sharedmem_set(rb->device, &rb->buffer_desc, 0, 0xAA,
 			   (rb->sizedwords << 2));
 
-	if (adreno_is_a2xx(adreno_dev)) {
-		kgsl_regwrite(device, REG_CP_RB_WPTR_BASE,
-			(rb->memptrs_desc.gpuaddr
-			+ GSL_RB_MEMPTRS_WPTRPOLL_OFFSET));
-
-		/* setup WPTR delay */
-		kgsl_regwrite(device, REG_CP_RB_WPTR_DELAY,
-			0 /*0x70000010 */);
-	}
-
-	/*setup REG_CP_RB_CNTL */
-	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_CNTL, &rb_cntl);
-	cp_rb_cntl.val = rb_cntl;
-
 	/*
 	 * The size of the ringbuffer in the hardware is the log2
-	 * representation of the size in quadwords (sizedwords / 2)
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
 	 */
-	cp_rb_cntl.f.rb_bufsz = ilog2(rb->sizedwords >> 1);
 
-	/*
-	 * Specify the quadwords to read before updating mem RPTR.
-	 * Like above, pass the log2 representation of the blocksize
-	 * in quadwords.
-	*/
-	cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3);
-
-	if (adreno_is_a2xx(adreno_dev)) {
-		/* WPTR polling */
-		cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN;
-	}
-
-	/* mem RPTR writebacks */
-	cp_rb_cntl.f.rb_no_update =  GSL_RB_CNTL_NO_UPDATE;
-
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL, cp_rb_cntl.val);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
+		(ilog2(rb->sizedwords >> 1) & 0x3F) |
+		(1 << 27));
 
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
 					rb->buffer_desc.gpuaddr);
 
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR,
-				rb->memptrs_desc.gpuaddr +
-				GSL_RB_MEMPTRS_RPTR_OFFSET);
-
 	if (adreno_is_a2xx(adreno_dev)) {
 		/* explicitly clear all cp interrupts */
 		kgsl_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF);
@@ -394,6 +411,19 @@
 		kgsl_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x003E2008);
 
 	rb->wptr = 0;
+}
+
+/**
+ * _ringbuffer_start_common() - Ringbuffer start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Start ringbuffer for GPU.
+ */
+int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
+{
+	int status;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	/* clear ME_HALT to start micro engine */
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
@@ -425,39 +455,99 @@
 	struct kgsl_device *device = rb->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	/* load the CP ucode */
-	status = adreno_ringbuffer_load_pm4_ucode(device,
-			adreno_dev->pm4_jt_idx, adreno_dev->pm4_jt_addr);
-	if (status != 0)
-		return status;
+	if (rb->flags & KGSL_FLAGS_STARTED)
+		return 0;
 
-	/* load the prefetch parser ucode */
-	status = adreno_ringbuffer_load_pfp_ucode(device,
-			adreno_dev->pfp_jt_idx, adreno_dev->pfp_jt_addr);
-	if (status != 0)
-		return status;
+	_ringbuffer_setup_common(rb);
 
-	return _ringbuffer_start_common(rb);
+	/* If bootstrapping if supported to load jump tables */
+	if (adreno_bootstrap_ucode(adreno_dev)) {
+		status = _ringbuffer_bootstrap_ucode(rb, 1);
+		if (status != 0)
+			return status;
+
+	} else {
+		/* load the CP jump tables using AHB writes */
+		status = adreno_ringbuffer_load_pm4_ucode(device,
+			adreno_dev->pm4_jt_idx, adreno_dev->pm4_fw_size,
+			adreno_dev->pm4_jt_addr);
+		if (status != 0)
+			return status;
+
+		/* load the prefetch parser jump tables using AHB writes */
+		status = adreno_ringbuffer_load_pfp_ucode(device,
+			adreno_dev->pfp_jt_idx, adreno_dev->pfp_fw_size,
+			adreno_dev->pfp_jt_addr);
+		if (status != 0)
+			return status;
+	}
+
+	status = _ringbuffer_start_common(rb);
+
+	return status;
 }
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb)
+/**
+ * adreno_ringbuffer_cold_start() - Ringbuffer cold start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Start the ringbuffer from power collapse.
+ */
+int adreno_ringbuffer_cold_start(struct adreno_ringbuffer *rb)
 {
 	int status;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	if (rb->flags & KGSL_FLAGS_STARTED)
 		return 0;
 
-	/* load the CP ucode */
-	status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1, 0);
-	if (status != 0)
-		return status;
+	_ringbuffer_setup_common(rb);
 
-	/* load the prefetch parser ucode */
-	status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1, 0);
-	if (status != 0)
-		return status;
+	/* If bootstrapping if supported to load ucode */
+	if (adreno_bootstrap_ucode(adreno_dev)) {
 
-	return _ringbuffer_start_common(rb);
+		/*
+		 * load first adreno_dev->pm4_bstrp_size +
+		 * adreno_dev->pfp_bstrp_size microcode dwords using AHB write,
+		 * this small microcode has dispatcher + booter, this initial
+		 * microcode enables CP to understand CP_BOOTSTRAP_UCODE packet
+		 * in function _ringbuffer_bootstrap_ucode. CP_BOOTSTRAP_UCODE
+		 * packet loads rest of the microcode.
+		 */
+
+		status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+					adreno_dev->pm4_bstrp_size+1, 0);
+		if (status != 0)
+			return status;
+
+		status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+					adreno_dev->pfp_bstrp_size+1, 0);
+		if (status != 0)
+			return status;
+
+		/* Bootstrap rest of the ucode here */
+		status = _ringbuffer_bootstrap_ucode(rb, 0);
+		if (status != 0)
+			return status;
+
+	} else {
+		/* load the CP ucode using AHB writes */
+		status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+					adreno_dev->pm4_fw_size, 0);
+		if (status != 0)
+			return status;
+
+		/* load the prefetch parser ucode using AHB writes */
+		status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+					adreno_dev->pfp_fw_size, 0);
+		if (status != 0)
+			return status;
+	}
+
+	status = _ringbuffer_start_common(rb);
+
+	return status;
 }
 
 void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb)
@@ -497,20 +587,6 @@
 		return status;
 	}
 
-	/* allocate memory for polling and timestamps */
-	/* This really can be at 4 byte alignment boundry but for using MMU
-	 * we need to make it at page boundary */
-	status = kgsl_allocate_contiguous(&rb->memptrs_desc,
-		sizeof(struct kgsl_rbmemptrs));
-
-	if (status != 0) {
-		adreno_ringbuffer_close(rb);
-		return status;
-	}
-
-	/* overlay structure on memptrs memory */
-	rb->memptrs = (struct kgsl_rbmemptrs *) rb->memptrs_desc.hostptr;
-
 	rb->global_ts = 0;
 
 	return 0;
@@ -521,7 +597,6 @@
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
 
 	kgsl_sharedmem_free(&rb->buffer_desc);
-	kgsl_sharedmem_free(&rb->memptrs_desc);
 
 	kfree(adreno_dev->pfp_fw);
 	kfree(adreno_dev->pm4_fw);
@@ -578,7 +653,8 @@
 	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
 
 	/* Add two dwords for the CP_INTERRUPT */
-	total_sizedwords += drawctxt ? 2 : 0;
+	total_sizedwords +=
+		(drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) ?  2 : 0;
 
 	/* context rollover */
 	if (adreno_is_a3xx(adreno_dev))
@@ -594,6 +670,9 @@
 	total_sizedwords += 3; /* sop timestamp */
 	total_sizedwords += 4; /* eop timestamp */
 
+	if (adreno_is_a20x(adreno_dev))
+		total_sizedwords += 2; /* CACHE_FLUSH */
+
 	if (drawctxt) {
 		total_sizedwords += 3; /* global timestamp without cache
 					* flush for non-zero context */
@@ -607,7 +686,7 @@
 
 	/* Add space for the power on shader fixup if we need it */
 	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
-		total_sizedwords += 5;
+		total_sizedwords += 9;
 
 	ringcmds = adreno_ringbuffer_allocspace(rb, drawctxt, total_sizedwords);
 
@@ -629,6 +708,11 @@
 	}
 
 	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) {
+		/* Disable protected mode for the fixup */
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0);
+
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, cp_nop_packet(1));
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 				KGSL_PWRON_FIXUP_IDENTIFIER);
@@ -638,6 +722,11 @@
 			adreno_dev->pwron_fixup.gpuaddr);
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 			adreno_dev->pwron_fixup_dwords);
+
+		/* Re-enable protected mode */
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 1);
 	}
 
 	/* start-of-pipeline timestamp */
@@ -709,7 +798,8 @@
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 				eoptimestamp)));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, rb->global_ts);
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
+			rb->global_ts);
 	}
 
 	if (adreno_is_a20x(adreno_dev)) {
@@ -1014,7 +1104,7 @@
 		timestamp);
 
 	if (ret)
-		KGSL_DRV_ERR(device, "adreno_dispatcher_queue_cmd returned %d\n",
+		KGSL_DRV_ERR(device, "adreno_context_queue_cmd returned %d\n",
 				ret);
 	else {
 		/*
@@ -1033,9 +1123,89 @@
 		}
 	}
 
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked - userspace uses this to perform post-fault activities
+	 */
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv))
+		ret = -EPROTO;
+
 	return ret;
 }
 
+unsigned int adreno_ringbuffer_get_constraint(struct kgsl_device *device,
+				struct kgsl_context *context)
+{
+	unsigned int pwrlevel = device->pwrctrl.active_pwrlevel;
+
+	switch (context->pwr_constraint.type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		switch (context->pwr_constraint.sub_type) {
+		case KGSL_CONSTRAINT_PWR_MAX:
+			pwrlevel = device->pwrctrl.max_pwrlevel;
+			break;
+		case KGSL_CONSTRAINT_PWR_MIN:
+			pwrlevel = device->pwrctrl.min_pwrlevel;
+			break;
+		default:
+			break;
+		}
+	}
+	break;
+
+	}
+
+	return pwrlevel;
+}
+
+void adreno_ringbuffer_set_constraint(struct kgsl_device *device,
+			struct kgsl_cmdbatch *cmdbatch)
+{
+	unsigned int constraint;
+	struct kgsl_context *context = cmdbatch->context;
+	/*
+	 * Check if the context has a constraint and constraint flags are
+	 * set.
+	 */
+	if (context->pwr_constraint.type &&
+		((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) ||
+			(cmdbatch->flags & KGSL_CONTEXT_PWR_CONSTRAINT))) {
+
+		constraint = adreno_ringbuffer_get_constraint(device, context);
+
+		/*
+		 * If a constraint is already set, set a new constraint only
+		 * if it is faster.  If the requested constraint is the same
+		 * as the current one, update ownership and timestamp.
+		 */
+		if ((device->pwrctrl.constraint.type ==
+			KGSL_CONSTRAINT_NONE) || (constraint <
+			device->pwrctrl.constraint.hint.pwrlevel.level)) {
+
+			kgsl_pwrctrl_pwrlevel_change(device, constraint);
+			device->pwrctrl.constraint.type =
+					context->pwr_constraint.type;
+			device->pwrctrl.constraint.hint.
+					pwrlevel.level = constraint;
+			device->pwrctrl.constraint.owner_id = context->id;
+			device->pwrctrl.constraint.expires = jiffies +
+					device->pwrctrl.interval_timeout;
+			/* Trace the constraint being set by the driver */
+			trace_kgsl_constraint(device,
+					device->pwrctrl.constraint.type,
+					constraint, 1);
+		} else if ((device->pwrctrl.constraint.type ==
+				context->pwr_constraint.type) &&
+			(device->pwrctrl.constraint.hint.pwrlevel.level ==
+				constraint)) {
+			device->pwrctrl.constraint.owner_id = context->id;
+			device->pwrctrl.constraint.expires = jiffies +
+					device->pwrctrl.interval_timeout;
+		}
+	}
+
+}
+
 /* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
 int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
 		struct kgsl_cmdbatch *cmdbatch)
@@ -1062,7 +1232,7 @@
 	commands are stored in the first node of the IB chain. We can skip that
 	if a context switch hasn't occured */
 
-	if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) &&
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
 		!test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) &&
 		(adreno_dev->drawctxt_active == drawctxt))
 		start_index = 1;
@@ -1143,6 +1313,9 @@
 		test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
 		flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
 
+	/* Set the constraints before adding to ringbuffer */
+	adreno_ringbuffer_set_constraint(device, cmdbatch);
+
 	ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
 					drawctxt,
 					flags,
@@ -1164,7 +1337,7 @@
 	device->pwrctrl.irq_last = 0;
 	kgsl_trace_issueibcmds(device, context->id, cmdbatch,
 		cmdbatch->timestamp, cmdbatch->flags, ret,
-		drawctxt ? drawctxt->type : 0);
+		drawctxt->type);
 
 	kfree(link);
 	return ret;
diff --git a/drivers/gpu/msm2/adreno_ringbuffer.h b/drivers/gpu/msm2/adreno_ringbuffer.h
index 3aa0101..697e113 100644
--- a/drivers/gpu/msm2/adreno_ringbuffer.h
+++ b/drivers/gpu/msm2/adreno_ringbuffer.h
@@ -19,7 +19,6 @@
  */
 
 #define KGSL_RB_SIZE (32 * 1024)
-#define KGSL_RB_BLKSIZE 16
 
 /* CP timestamp register */
 #define	REG_CP_TIMESTAMP		 REG_SCRATCH_REG0
@@ -28,27 +27,12 @@
 struct kgsl_device;
 struct kgsl_device_private;
 
-#define GSL_RB_MEMPTRS_SCRATCH_COUNT	 8
-struct kgsl_rbmemptrs {
-	int  rptr;
-	int  wptr_poll;
-};
-
-#define GSL_RB_MEMPTRS_RPTR_OFFSET \
-	(offsetof(struct kgsl_rbmemptrs, rptr))
-
-#define GSL_RB_MEMPTRS_WPTRPOLL_OFFSET \
-	(offsetof(struct kgsl_rbmemptrs, wptr_poll))
-
 struct adreno_ringbuffer {
 	struct kgsl_device *device;
 	uint32_t flags;
 
 	struct kgsl_memdesc buffer_desc;
 
-	struct kgsl_memdesc memptrs_desc;
-	struct kgsl_rbmemptrs *memptrs;
-
 	/*ringbuffer size */
 	unsigned int sizedwords;
 
@@ -70,25 +54,6 @@
 /* enable timestamp (...scratch0) memory shadowing */
 #define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1
 
-/* mem rptr */
-#define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */
-
-/**
- * adreno_get_rptr - Get the current ringbuffer read pointer
- * @rb -  the ringbuffer
- *
- * Get the current read pointer, which is written by the GPU.
- */
-static inline unsigned int
-adreno_get_rptr(struct adreno_ringbuffer *rb)
-{
-	unsigned int result = rb->memptrs->rptr;
-	rmb();
-	return result;
-}
-
-#define GSL_RB_CNTL_POLL_EN 0x0 /* disable */
-
 /*
  * protected mode error checking below register address 0x800
  * note: if CP_INTERRUPT packet is used then checking needs
@@ -108,7 +73,7 @@
 
 int adreno_ringbuffer_warm_start(struct adreno_ringbuffer *rb);
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb);
+int adreno_ringbuffer_cold_start(struct adreno_ringbuffer *rb);
 
 void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb);
 
diff --git a/drivers/gpu/msm2/adreno_trace.h b/drivers/gpu/msm2/adreno_trace.h
index 6079b61..6ac4628 100644
--- a/drivers/gpu/msm2/adreno_trace.h
+++ b/drivers/gpu/msm2/adreno_trace.h
@@ -21,6 +21,7 @@
 #define TRACE_INCLUDE_FILE adreno_trace
 
 #include <linux/tracepoint.h>
+#include "kgsl_device.h"
 
 TRACE_EVENT(adreno_cmdbatch_queued,
 	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int queued),
@@ -264,6 +265,36 @@
 		__entry->ib2base, __entry->ib2size)
 );
 
+TRACE_EVENT(kgsl_user_pwrlevel_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type,
+		unsigned int sub_type),
+
+	TP_ARGS(device, id, type, sub_type),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+		__field(unsigned int, type)
+		__field(unsigned int, sub_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = id;
+		__entry->type = type;
+		__entry->sub_type = sub_type;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s",
+		__get_str(device_name), __entry->id,
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__print_symbolic(__entry->sub_type,
+		KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES)
+	)
+);
+
 #endif /* _ADRENO_TRACE_H */
 
 /* This part must be outside protection */
diff --git a/drivers/gpu/msm2/kgsl.c b/drivers/gpu/msm2/kgsl.c
index 72888b2..efc4c69 100644
--- a/drivers/gpu/msm2/kgsl.c
+++ b/drivers/gpu/msm2/kgsl.c
@@ -20,7 +20,6 @@
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
 #include <linux/dma-buf.h>
-
 #include <linux/vmalloc.h>
 #include <linux/pm_runtime.h>
 #include <linux/genlock.h>
@@ -41,6 +40,7 @@
 #include "kgsl_device.h"
 #include "kgsl_trace.h"
 #include "kgsl_sync.h"
+#include "adreno.h"
 
 #undef MODULE_PARAM_PREFIX
 #define MODULE_PARAM_PREFIX "kgsl."
@@ -62,9 +62,6 @@
 
 static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry);
 
-static void
-kgsl_put_process_private(struct kgsl_device *device,
-			 struct kgsl_process_private *private);
 /**
  * kgsl_trace_issueibcmds() - Call trace_issueibcmds by proxy
  * device: KGSL device
@@ -344,27 +341,33 @@
  */
 static int
 kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
-				   struct kgsl_process_private *process)
+				   struct kgsl_device_private *dev_priv)
 {
 	int ret;
+	struct kgsl_process_private *process = dev_priv->process_priv;
+
+	ret = kgsl_process_private_get(process);
+	if (!ret)
+		return -EBADF;
 
 	while (1) {
 		if (idr_pre_get(&process->mem_idr, GFP_KERNEL) == 0) {
 			ret = -ENOMEM;
-			goto err;
+			goto err_put_proc_priv;
 		}
 
 		spin_lock(&process->mem_lock);
 		ret = idr_get_new_above(&process->mem_idr, entry, 1,
 					&entry->id);
 		spin_unlock(&process->mem_lock);
-
 		if (ret == 0)
 			break;
 		else if (ret != -EAGAIN)
-			goto err;
+			goto err_put_proc_priv;
 	}
+
 	entry->priv = process;
+	entry->dev_priv = dev_priv;
 
 	spin_lock(&process->mem_lock);
 	ret = kgsl_mem_entry_track_gpuaddr(process, entry);
@@ -372,14 +375,17 @@
 		idr_remove(&process->mem_idr, entry->id);
 	spin_unlock(&process->mem_lock);
 	if (ret)
-		goto err;
+		goto err_put_proc_priv;
 	/* map the memory after unlocking if gpuaddr has been assigned */
 	if (entry->memdesc.gpuaddr) {
 		ret = kgsl_mmu_map(process->pagetable, &entry->memdesc);
 		if (ret)
 			kgsl_mem_entry_detach_process(entry);
 	}
-err:
+	return ret;
+
+err_put_proc_priv:
+	kgsl_process_private_put(process);
 	return ret;
 }
 
@@ -402,6 +408,7 @@
 
 	entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
 	spin_unlock(&entry->priv->mem_lock);
+	kgsl_process_private_put(entry->priv);
 
 	entry->priv = NULL;
 }
@@ -459,7 +466,7 @@
 	 * the context is destroyed. This will also prevent the pagetable
 	 * from being destroyed
 	 */
-	if (!kref_get_unless_zero(&dev_priv->process_priv->refcount))
+	if (!kgsl_process_private_get(dev_priv->process_priv))
 		goto fail_free_id;
 	context->device = dev_priv->device;
 	context->dev_priv = dev_priv;
@@ -506,31 +513,29 @@
  */
 int kgsl_context_detach(struct kgsl_context *context)
 {
-	struct kgsl_device *device;
 	int ret;
 
-	if (context == NULL || kgsl_context_detached(context))
+	if (context == NULL)
 		return -EINVAL;
 
-	device = context->device;
-
-	trace_kgsl_context_detach(device, context);
-
 	/*
 	 * Mark the context as detached to keep others from using
-	 * the context before it gets fully removed
+	 * the context before it gets fully removed, and to make sure
+	 * we don't try to detach twice.
 	 */
-	set_bit(KGSL_CONTEXT_DETACHED, &context->priv);
+	if (test_and_set_bit(KGSL_CONTEXT_DETACHED, &context->priv))
+		return -EINVAL;
 
-	ret = device->ftbl->drawctxt_detach(context);
+	trace_kgsl_context_detach(context->device, context);
+
+	ret = context->device->ftbl->drawctxt_detach(context);
 
 	/*
 	 * Cancel events after the device-specific context is
 	 * detached, to avoid possibly freeing memory while
 	 * it is still in use by the GPU.
 	 */
-
-	kgsl_context_cancel_events(device, context);
+	kgsl_context_cancel_events(context->device, context);
 
 	kgsl_context_put(context);
 
@@ -550,13 +555,28 @@
 
 	write_lock(&device->context_lock);
 	if (context->id != KGSL_CONTEXT_INVALID) {
+
+		/* Clear the timestamps in the memstore during destroy */
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0);
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0);
+
+		/* clear device power constraint */
+		if (context->id == device->pwrctrl.constraint.owner_id) {
+			trace_kgsl_constraint(device,
+				device->pwrctrl.constraint.type,
+				device->pwrctrl.active_pwrlevel,
+				0);
+			device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+		}
+
 		idr_remove(&device->context_idr, context->id);
 		context->id = KGSL_CONTEXT_INVALID;
 	}
 	write_unlock(&device->context_lock);
 	kgsl_sync_timeline_destroy(context);
-	kgsl_put_process_private(device,
-				context->proc_priv);
+	kgsl_process_private_put(context->proc_priv);
 
 	device->ftbl->drawctxt_destroy(context);
 }
@@ -821,9 +841,8 @@
 	return;
 }
 
-static void
-kgsl_put_process_private(struct kgsl_device *device,
-			 struct kgsl_process_private *private)
+void
+kgsl_process_private_put(struct kgsl_process_private *private)
 {
 	mutex_lock(&kgsl_driver.process_mutex);
 
@@ -837,7 +856,7 @@
 	return;
 }
 
-/*
+/**
  * find_process_private() - Helper function to search for process private
  * @cur_dev_priv: Pointer to device private structure which contains pointers
  * to device and process_private structs.
@@ -852,7 +871,8 @@
 	mutex_lock(&kgsl_driver.process_mutex);
 	list_for_each_entry(private, &kgsl_driver.process_list, list) {
 		if (private->pid == task_tgid_nr(current)) {
-			kref_get(&private->refcount);
+			if (!kgsl_process_private_get(private))
+				private = NULL;
 			goto done;
 		}
 	}
@@ -877,7 +897,7 @@
 	return private;
 }
 
-/*
+/**
  * kgsl_get_process_private() - Used to find the process private structure
  * @cur_dev_priv: Current device pointer
  * Finds or creates a new porcess private structire and initializes its members
@@ -891,13 +911,12 @@
 
 	private = kgsl_find_process_private(cur_dev_priv);
 
+	if (!private)
+		return NULL;
+
 	mutex_lock(&private->process_private_mutex);
 
-	/*
-	 * If debug root initialized then it means the rest of the fields
-	 * are also initialized
-	 */
-	if (private->debug_root)
+	if (test_bit(KGSL_PROCESS_INIT, &private->priv))
 		goto done;
 
 	private->mem_rb = RB_ROOT;
@@ -918,13 +937,15 @@
 	if (kgsl_process_init_debugfs(private))
 		goto error;
 
+	set_bit(KGSL_PROCESS_INIT, &private->priv);
+
 done:
 	mutex_unlock(&private->process_private_mutex);
 	return private;
 
 error:
 	mutex_unlock(&private->process_private_mutex);
-	kgsl_put_process_private(cur_dev_priv->device, private);
+	kgsl_process_private_put(private);
 	return NULL;
 }
 
@@ -957,6 +978,7 @@
 	struct kgsl_process_private *private = dev_priv->process_priv;
 	struct kgsl_device *device = dev_priv->device;
 	struct kgsl_context *context;
+	struct kgsl_mem_entry *entry;
 	int next = 0;
 
 	filep->private_data = NULL;
@@ -985,6 +1007,31 @@
 
 		next = next + 1;
 	}
+
+	next = 0;
+	while (1) {
+		spin_lock(&private->mem_lock);
+		entry = idr_get_next(&private->mem_idr, &next);
+		if (entry == NULL) {
+			spin_unlock(&private->mem_lock);
+			break;
+		}
+		/*
+		 * If the free pending flag is not set it means that user space
+		 * did not free it's reference to this entry, in that case
+		 * free a reference to this entry, other references are from
+		 * within kgsl so they will be freed eventually by kgsl
+		 */
+		if (entry->dev_priv == dev_priv && !entry->pending_free) {
+			entry->pending_free = 1;
+			spin_unlock(&private->mem_lock);
+			trace_kgsl_mem_free(entry);
+			kgsl_mem_entry_put(entry);
+		} else {
+			spin_unlock(&private->mem_lock);
+		}
+		next = next + 1;
+	}
 	/*
 	 * Clean up any to-be-freed entries that belong to this
 	 * process and this device. This is done after the context
@@ -995,9 +1042,10 @@
 
 	result = kgsl_close_device(device);
 	mutex_unlock(&device->mutex);
+
 	kfree(dev_priv);
 
-	kgsl_put_process_private(device, private);
+	kgsl_process_private_put(private);
 
 	pm_runtime_put(device->parentdev);
 	return result;
@@ -1105,7 +1153,7 @@
 	if (device->open_count == 0) {
 		/* make sure power is on to stop the device */
 		kgsl_pwrctrl_enable(device);
-		result = device->ftbl->stop(device);
+		device->ftbl->stop(device);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
 		atomic_dec(&device->active_cnt);
 	}
@@ -1273,10 +1321,12 @@
 static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry)
 {
 	bool ret = false;
+
+	if (entry == NULL)
+		return false;
+
 	spin_lock(&entry->priv->mem_lock);
-	if (entry && entry->pending_free) {
-		ret = false;
-	} else if (entry) {
+	if (!entry->pending_free) {
 		entry->pending_free = 1;
 		ret = true;
 	}
@@ -1331,6 +1381,7 @@
 			result = -EINVAL;
 			break;
 		}
+
 		/*
 		 * Copy the reset status to value which also serves as
 		 * the out parameter
@@ -1365,8 +1416,8 @@
 
 	if (dev_priv->device->ftbl->setproperty)
 		result = dev_priv->device->ftbl->setproperty(
-			dev_priv->device, param->type,
-			param->value, param->sizebytes);
+			dev_priv, param->type, param->value,
+			param->sizebytes);
 
 	return result;
 }
@@ -1899,7 +1950,8 @@
 
 /**
  * _kgsl_cmdbatch_verify() - Perform a quick sanity check on a command batch
- * @device: Pointer to a KGSL device that owns the command batch
+ * @device: Pointer to a KGSL instance that owns the command batch
+ * @pagetable: Pointer to the pagetable for the current process
  * @cmdbatch: Number of indirect buffers to make room for in the cmdbatch
  *
  * Do a quick sanity test on the list of indirect buffers in a command batch
@@ -1909,7 +1961,6 @@
 	struct kgsl_cmdbatch *cmdbatch)
 {
 	int i;
-
 	struct kgsl_process_private *private = dev_priv->process_priv;
 
 	for (i = 0; i < cmdbatch->ibcount; i++) {
@@ -2074,7 +2125,11 @@
 		cmdbatch, &param->timestamp);
 
 free_cmdbatch:
-	if (result)
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
 		kgsl_cmdbatch_destroy(cmdbatch);
 
 done:
@@ -2122,7 +2177,11 @@
 		cmdbatch, &param->timestamp);
 
 free_cmdbatch:
-	if (result)
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
 		kgsl_cmdbatch_destroy(cmdbatch);
 
 done:
@@ -2331,6 +2390,11 @@
 
 	trace_kgsl_mem_free(entry);
 
+	kgsl_memfree_hist_set_event(entry->priv->pid,
+				    entry->memdesc.gpuaddr,
+				    entry->memdesc.size,
+				    entry->memdesc.flags);
+
 	/*
 	 * First kgsl_mem_entry_put is for the reference that we took in
 	 * this function when calling kgsl_sharedmem_find_id, second one is
@@ -2413,10 +2477,8 @@
 
 	ret = -ERANGE;
 
-	if (phys == 0) {
-		KGSL_CORE_ERR("kgsl_get_phys_file returned phys=0\n");
+	if (phys == 0)
 		goto err;
-	}
 
 	/* Make sure the length of the region, the offset and the desired
 	 * size are all page aligned or bail
@@ -2424,19 +2486,13 @@
 	if ((len & ~PAGE_MASK) ||
 		(offset & ~PAGE_MASK) ||
 		(size & ~PAGE_MASK)) {
-		KGSL_CORE_ERR("length %lu, offset %u or size %u "
-				"is not page aligned\n",
-				len, offset, size);
+		KGSL_CORE_ERR("length offset or size is not page aligned\n");
 		goto err;
 	}
 
 	/* The size or offset can never be greater than the PMEM length */
-	if (offset >= len || size > len) {
-		KGSL_CORE_ERR("offset %u or size %u "
-				"exceeds pmem length %lu\n",
-				offset, size, len);
+	if (offset >= len || size > len)
 		goto err;
-	}
 
 	/* If size is 0, then adjust it to default to the size of the region
 	 * minus the offset.  If size isn't zero, then make sure that it will
@@ -2753,7 +2809,7 @@
 			| KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	entry->memdesc.flags = param->flags;
-	if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
+	if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu))
 		entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
@@ -2826,7 +2882,7 @@
 	/* echo back flags */
 	param->flags = entry->memdesc.flags;
 
-	result = kgsl_mem_entry_attach_process(entry, private);
+	result = kgsl_mem_entry_attach_process(entry, dev_priv);
 	if (result)
 		goto error_attach;
 
@@ -2856,6 +2912,9 @@
 		break;
 	}
 error:
+	/* Clear gpuaddr here so userspace doesn't get any wrong ideas */
+	param->gpuaddr = 0;
+
 	kfree(entry);
 	return result;
 }
@@ -2950,7 +3009,7 @@
 	bool full_flush = false;
 
 	if (param->id_list == NULL || param->count == 0
-			|| param->count > (UINT_MAX/sizeof(unsigned int)))
+			|| param->count > (PAGE_SIZE / sizeof(unsigned int)))
 		return -EINVAL;
 
 	id_list = kzalloc(param->count * sizeof(unsigned int), GFP_KERNEL);
@@ -3072,7 +3131,8 @@
 
 	align = (flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
 	if (align >= 32) {
-		KGSL_CORE_ERR("Alignment too big, restricting to 2^32\n");
+		KGSL_CORE_ERR("Alignment too big, restricting to 2^31\n");
+
 		flags &= ~KGSL_MEMALIGN_MASK;
 		flags |= (31 << KGSL_MEMALIGN_SHIFT) & KGSL_MEMALIGN_MASK;
 	}
@@ -3084,8 +3144,8 @@
 	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
 		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
 
-	result = kgsl_allocate_user(&entry->memdesc, private->pagetable, size,
-				    flags);
+	result = kgsl_allocate_user(dev_priv->device, &entry->memdesc,
+				private->pagetable, size, flags);
 	if (result != 0)
 		goto err;
 
@@ -3113,7 +3173,7 @@
 	if (result)
 		return result;
 
-	result = kgsl_mem_entry_attach_process(entry, private);
+	result = kgsl_mem_entry_attach_process(entry, dev_priv);
 	if (result != 0)
 		goto err;
 
@@ -3135,18 +3195,19 @@
 			unsigned int cmd, void *data)
 {
 	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_device *device = dev_priv->device;
 	struct kgsl_gpumem_alloc_id *param = data;
 	struct kgsl_mem_entry *entry = NULL;
 	int result;
 
-	if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
+	if (!kgsl_mmu_use_cpu_map(&device->mmu))
 		param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
 	if (result != 0)
 		goto err;
 
-	result = kgsl_mem_entry_attach_process(entry, private);
+	result = kgsl_mem_entry_attach_process(entry, dev_priv);
 	if (result != 0)
 		goto err;
 
@@ -3243,11 +3304,12 @@
 };
 
 /**
- * kgsl_genlock_event_cb - Event callback for a genlock timestamp event
- * @device - The KGSL device that expired the timestamp
- * @priv - private data for the event
- * @context_id - the context id that goes with the timestamp
- * @timestamp - the timestamp that triggered the event
+ * kgsl_genlock_event_cb() - Event callback for a genlock timestamp event
+ * @device: The KGSL device that expired the timestamp
+ * @priv: private data for the event
+ * @context_id: the context id that goes with the timestamp
+ * @timestamp: the timestamp that triggered the event
+ * @type: Type of event that signaled the callback
  *
  * Release a genlock lock following the expiration of a timestamp
  */
@@ -3368,7 +3430,7 @@
 static const struct {
 	unsigned int cmd;
 	kgsl_ioctl_func_t func;
-	int flags;
+	unsigned int flags;
 } kgsl_ioctl_funcs[] = {
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
 			kgsl_ioctl_device_getproperty,
@@ -3647,7 +3709,7 @@
 static inline bool
 mmap_range_valid(unsigned long addr, unsigned long len)
 {
-	return (addr + len) > addr && (addr + len) < TASK_SIZE;
+	return ((ULONG_MAX - addr) > len) && ((addr + len) < TASK_SIZE);
 }
 
 static unsigned long
@@ -3804,7 +3866,7 @@
 	if (ret)
 		return ret;
 
-	vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc);
+	vma->vm_flags |= entry->memdesc.ops->vmflags;
 
 	vma->vm_private_data = entry;
 
@@ -4131,7 +4193,7 @@
 			pwr->power_flags, pwr->active_pwrlevel);
 
 		KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
-				  pwr->interval_timeout);
+				pwr->interval_timeout);
 
 	}
 
diff --git a/drivers/gpu/msm2/kgsl.h b/drivers/gpu/msm2/kgsl.h
index 32f105c..d22c4f6 100644
--- a/drivers/gpu/msm2/kgsl.h
+++ b/drivers/gpu/msm2/kgsl.h
@@ -146,7 +146,7 @@
 struct kgsl_cmdbatch;
 
 struct kgsl_memdesc_ops {
-	int (*vmflags)(struct kgsl_memdesc *);
+	unsigned int vmflags;
 	int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *,
 		       struct vm_fault *);
 	void (*free)(struct kgsl_memdesc *memdesc);
@@ -178,6 +178,7 @@
 	unsigned int sglen_alloc;  /* Allocated entries in the sglist */
 	struct kgsl_memdesc_ops *ops;
 	unsigned int flags; /* Flags set from userspace */
+	struct device *dev;
 };
 
 /* List of different memory entry types */
@@ -202,6 +203,7 @@
 	struct kgsl_process_private *priv;
 	/* Initialized to 0, set to 1 when entry is marked for freeing */
 	int pending_free;
+	struct kgsl_device_private *dev_priv;
 };
 
 #ifdef CONFIG_MSM_KGSL_MMU_PAGE_FAULT
@@ -271,7 +273,7 @@
 		size = 1;
 
 	/* don't overflow */
-	if ((gpuaddr + size) < gpuaddr)
+	if (size > UINT_MAX - gpuaddr)
 		return 0;
 
 	if (gpuaddr >= memdesc->gpuaddr &&
diff --git a/drivers/gpu/msm2/kgsl_debugfs.c b/drivers/gpu/msm2/kgsl_debugfs.c
index d62a222..7b8e237 100644
--- a/drivers/gpu/msm2/kgsl_debugfs.c
+++ b/drivers/gpu/msm2/kgsl_debugfs.c
@@ -247,7 +247,7 @@
 
 static void print_mem_entry(struct seq_file *s, struct kgsl_mem_entry *entry)
 {
-	char flags[6];
+	char flags[7];
 	char usage[16];
 	struct kgsl_memdesc *m = &entry->memdesc;
 
@@ -256,13 +256,16 @@
 	flags[2] = get_alignflag(m);
 	flags[3] = get_cacheflag(m);
 	flags[4] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
-	flags[5] = '\0';
+	flags[5] = (m->useraddr) ? 'Y' : 'N';
+	flags[6] = '\0';
 
 	kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
 
-	seq_printf(s, "%08x %08lx %8d %5d %5s %10s %16s %5d\n",
-			m->gpuaddr, m->useraddr, m->size, entry->id, flags,
-			memtype_str(entry->memtype), usage, m->sglen);
+	seq_printf(s, "%pK %pK %8zd %5d %6s %10s %16s %5d\n",
+			   (unsigned long *)(uintptr_t) m->gpuaddr,
+			   (unsigned long *) m->useraddr,
+				m->size, entry->id, flags,
+				memtype_str(entry->memtype), usage, m->sglen);
 }
 
 static int process_mem_print(struct seq_file *s, void *unused)
@@ -272,7 +275,7 @@
 	struct kgsl_process_private *private = s->private;
 	int next = 0;
 
-	seq_printf(s, "%8s %8s %8s %5s %5s %10s %16s %5s\n",
+	seq_printf(s, "%8s %8s %8s %5s %6s %10s %16s %5s\n",
 		   "gpuaddr", "useraddr", "size", "id", "flags", "type",
 		   "usage", "sglen");
 
@@ -301,14 +304,36 @@
 
 static int process_mem_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, process_mem_print, inode->i_private);
+	struct kgsl_process_private *private = inode->i_private;
+
+	/*
+	 * Hold a reference count on the process while open
+	 * in case the process tries to die in the meantime.
+	 * If the process is already dying we cannot get a
+	 * refcount, print nothing.
+	 */
+
+	if (!private || !kgsl_process_private_get(private))
+		return -ENODEV;
+
+	return single_open(file, process_mem_print, private);
+}
+
+static int process_mem_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_process_private *private = inode->i_private;
+
+	if (private)
+		kgsl_process_private_put(private);
+
+	return single_release(inode, file);
 }
 
 static const struct file_operations process_mem_fops = {
 	.open = process_mem_open,
 	.read = seq_read,
 	.llseek = seq_lseek,
-	.release = single_release,
+	.release = process_mem_release,
 };
 
 
@@ -350,7 +375,7 @@
 	 * So if debugfs is disabled in kernel, return as
 	 * success.
 	 */
-	dentry = debugfs_create_file("mem", 0400, private->debug_root, private,
+	dentry = debugfs_create_file("mem", 0444, private->debug_root, private,
 			    &process_mem_fops);
 
 	if (IS_ERR(dentry)) {
diff --git a/drivers/gpu/msm2/kgsl_debugfs.h b/drivers/gpu/msm2/kgsl_debugfs.h
index b2f137c..fe9bc76 100644
--- a/drivers/gpu/msm2/kgsl_debugfs.h
+++ b/drivers/gpu/msm2/kgsl_debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2011,2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -35,7 +35,7 @@
 static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { }
 static inline void kgsl_core_debugfs_close(void) { }
 static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; }
-static inline int kgsl_process_init_debugfs(struct kgsl_process_private *)
+static inline int kgsl_process_init_debugfs(struct kgsl_process_private *priv)
 {
 	return 0;
 }
diff --git a/drivers/gpu/msm2/kgsl_device.h b/drivers/gpu/msm2/kgsl_device.h
index 44324c6..b78f275 100644
--- a/drivers/gpu/msm2/kgsl_device.h
+++ b/drivers/gpu/msm2/kgsl_device.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -127,8 +127,8 @@
 	void (*drawctxt_destroy) (struct kgsl_context *context);
 	long (*ioctl) (struct kgsl_device_private *dev_priv,
 		unsigned int cmd, void *data);
-	int (*setproperty) (struct kgsl_device *device,
-		enum kgsl_property_type type, void *value,
+	int (*setproperty) (struct kgsl_device_private *dev_priv,
+		enum kgsl_property_type type, void __user *value,
 		unsigned int sizebytes);
 	int (*postmortem_dump) (struct kgsl_device *device, int manual);
 	void (*drawctxt_sched)(struct kgsl_device *device,
@@ -311,7 +311,6 @@
 };
 
 void kgsl_process_events(struct work_struct *work);
-void kgsl_check_fences(struct work_struct *work);
 
 #define KGSL_DEVICE_COMMON_INIT(_dev) \
 	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
@@ -359,6 +358,10 @@
  * @pagefault: flag set if this context caused a pagefault.
  * @pagefault_ts: global timestamp of the pagefault, if KGSL_CONTEXT_PAGEFAULT
  * is set.
+ * @flags: flags from userspace controlling the behavior of this context
+ * @fault_count: number of times gpu hanged in last _context_throttle_time ms
+ * @fault_time: time of the first gpu hang in last _context_throttle_time ms
+ * @pwr_constraint: power constraint from userspace for this context
  */
 struct kgsl_context {
 	struct kref refcount;
@@ -374,10 +377,29 @@
 	struct list_head events;
 	struct list_head events_list;
 	unsigned int pagefault_ts;
+	unsigned int flags;
+	unsigned int fault_count;
+	unsigned long fault_time;
+	struct kgsl_pwr_constraint pwr_constraint;
 };
 
+/**
+ * struct kgsl_process_private -  Private structure for a KGSL process (across
+ * all devices)
+ * @priv: Internal flags, use KGSL_PROCESS_* values
+ * @pid: ID for the task owner of the process
+ * @mem_lock: Spinlock to protect the process memory lists
+ * @refcount: kref object for reference counting the process
+ * @process_private_mutex: Mutex to synchronize access to the process struct
+ * @mem_rb: RB tree node for the memory owned by this process
+ * @idr: Iterator for assigning IDs to memory allocations
+ * @pagetable: Pointer to the pagetable owned by this process
+ * @kobj: Pointer to a kobj for the sysfs directory for this process
+ * @debug_root: Pointer to the debugfs root for this process
+ * @stats: Memory allocation statistics for this process
+ */
 struct kgsl_process_private {
-	unsigned int refcnt;
+	unsigned long priv;
 	pid_t pid;
 	spinlock_t mem_lock;
 
@@ -399,6 +421,14 @@
 	} stats[KGSL_MEM_ENTRY_MAX];
 };
 
+/**
+ * enum kgsl_process_priv_flags - Private flags for kgsl_process_private
+ * @KGSL_PROCESS_INIT: Set if the process structure has been set up
+ */
+enum kgsl_process_priv_flags {
+	KGSL_PROCESS_INIT = 0,
+};
+
 struct kgsl_device_private {
 	struct kgsl_device *device;
 	struct kgsl_process_private *process_priv;
@@ -631,7 +661,7 @@
  * Find the context associated with the given ID number, increase the reference
  * count on it and return it.  The caller must make sure that this call is
  * paired with a kgsl_context_put. This function validates that the context id
- * given is owned by the dev_priv instancet that is passed in.  see
+ * given is owned by the dev_priv instancet that is passed in.  See
  * kgsl_context_get for the internal version that doesn't do the check
  */
 static inline struct kgsl_context *kgsl_context_get_owner(
@@ -682,6 +712,24 @@
 void kgsl_cmdbatch_destroy_object(struct kref *kref);
 
 /**
+* kgsl_process_private_get() - increment the refcount on a kgsl_process_private
+*   struct
+* @process: Pointer to the KGSL process_private
+*
+* Returns 0 if the structure is invalid and a reference count could not be
+* obtained, nonzero otherwise.
+*/
+static inline int kgsl_process_private_get(struct kgsl_process_private *process)
+{
+	int ret = 0;
+	if (process != NULL)
+		ret = kref_get_unless_zero(&process->refcount);
+	return ret;
+}
+
+void kgsl_process_private_put(struct kgsl_process_private *private);
+
+/**
  * kgsl_cmdbatch_put() - Decrement the refcount for a command batch object
  * @cmdbatch: Pointer to the command batch object
  */
@@ -700,7 +748,16 @@
  */
 static inline int kgsl_cmdbatch_sync_pending(struct kgsl_cmdbatch *cmdbatch)
 {
-	return list_empty(&cmdbatch->synclist) ? 0 : 1;
+	int ret;
+
+	if (cmdbatch == NULL)
+		return 0;
+
+	spin_lock(&cmdbatch->lock);
+	ret = list_empty(&cmdbatch->synclist) ? 0 : 1;
+	spin_unlock(&cmdbatch->lock);
+
+	return ret;
 }
 
 #if defined(CONFIG_GPU_TRACEPOINTS)
diff --git a/drivers/gpu/msm2/kgsl_drm.c b/drivers/gpu/msm2/kgsl_drm.c
index 6402bf4..1fc7467 100644
--- a/drivers/gpu/msm2/kgsl_drm.c
+++ b/drivers/gpu/msm2/kgsl_drm.c
@@ -224,11 +224,23 @@
 				return result;
 			}
 
-			result = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
-					GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+			result = kgsl_mmu_get_gpuaddr(priv->pagetable,
+							&priv->memdesc);
+			if (result) {
+				DRM_ERROR(
+				"kgsl_mmu_get_gpuaddr failed. result = %d\n",
+				result);
+				ion_free(kgsl_drm_ion_client,
+					priv->ion_handle);
+				priv->ion_handle = NULL;
+				return result;
+			}
+			result = kgsl_mmu_map(priv->pagetable, &priv->memdesc);
 			if (result) {
 				DRM_ERROR(
 				"kgsl_mmu_map failed.  result = %d\n", result);
+				kgsl_mmu_put_gpuaddr(priv->pagetable,
+							&priv->memdesc);
 				ion_free(kgsl_drm_ion_client,
 					priv->ion_handle);
 				priv->ion_handle = NULL;
@@ -274,10 +286,17 @@
 			priv->memdesc.sglen++;
 		}
 
+		result = kgsl_mmu_get_gpuaddr(priv->pagetable, &priv->memdesc);
+		if (result) {
+			DRM_ERROR(
+			"kgsl_mmu_get_gpuaddr failed.  result = %d\n", result);
+			goto memerr;
+		}
 		result = kgsl_mmu_map(priv->pagetable, &priv->memdesc);
 		if (result) {
 			DRM_ERROR(
 			"kgsl_mmu_map failed.  result = %d\n", result);
+			kgsl_mmu_put_gpuaddr(priv->pagetable, &priv->memdesc);
 			goto memerr;
 		}
 
@@ -312,8 +331,10 @@
 	if (!kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
 		return;
 
-	if (priv->memdesc.gpuaddr)
+	if (priv->memdesc.gpuaddr) {
 		kgsl_mmu_unmap(priv->memdesc.pagetable, &priv->memdesc);
+		kgsl_mmu_put_gpuaddr(priv->memdesc.pagetable, &priv->memdesc);
+	}
 
 	/* ION will take care of freeing the sg table. */
 	priv->memdesc.sg = NULL;
@@ -646,9 +667,21 @@
 		priv->memdesc.sglen++;
 	}
 
+	ret = kgsl_mmu_get_gpuaddr(priv->pagetable, &priv->memdesc);
+	if (ret) {
+		DRM_ERROR("kgsl_mmu_get_gpuaddr failed.  ret = %d\n", ret);
+		ion_free(kgsl_drm_ion_client,
+			priv->ion_handle);
+		priv->ion_handle = NULL;
+		kgsl_mmu_putpagetable(priv->pagetable);
+		drm_gem_object_release(obj);
+		kfree(priv);
+		return -ENOMEM;
+	}
 	ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc);
 	if (ret) {
 		DRM_ERROR("kgsl_mmu_map failed.  ret = %d\n", ret);
+		kgsl_mmu_put_gpuaddr(priv->pagetable, &priv->memdesc);
 		ion_free(kgsl_drm_ion_client,
 			priv->ion_handle);
 		priv->ion_handle = NULL;
diff --git a/drivers/gpu/msm2/kgsl_events.c b/drivers/gpu/msm2/kgsl_events.c
index d76b628..342d76c 100644
--- a/drivers/gpu/msm2/kgsl_events.c
+++ b/drivers/gpu/msm2/kgsl_events.c
@@ -211,10 +211,8 @@
 	kgsl_event_func func, void *priv, void *owner)
 {
 	struct kgsl_event *event;
-	unsigned int cur_ts;
+	unsigned int queued = 0, cur_ts;
 	struct kgsl_context *context = NULL;
-	struct adreno_context *drawctxt;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	BUG_ON(!mutex_is_locked(&device->mutex));
 
@@ -225,16 +223,23 @@
 		context = kgsl_context_get(device, id);
 		if (context == NULL)
 			return -EINVAL;
-		/* Do not allow registering of event with invalid timestamp */
-		drawctxt = ADRENO_CONTEXT(context);
-		if (timestamp_cmp(ts, drawctxt->timestamp) > 0) {
+	}
+	/*
+	 * If the caller is creating their own timestamps, let them schedule
+	 * events in the future. Otherwise only allow timestamps that have been
+	 * queued.
+	 */
+	if (context == NULL ||
+		((context->flags & KGSL_CONTEXT_USER_GENERATED_TS) == 0)) {
+		queued = kgsl_readtimestamp(device, context,
+			KGSL_TIMESTAMP_QUEUED);
+
+		if (timestamp_cmp(ts, queued) > 0) {
 			kgsl_context_put(context);
 			return -EINVAL;
 		}
-	} else {
-		if (timestamp_cmp(ts, adreno_dev->ringbuffer.global_ts) > 0)
-			return -EINVAL;
 	}
+
 	cur_ts = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
 
 	/*
@@ -333,7 +338,11 @@
 		void *priv)
 {
 	struct kgsl_event *event;
-	struct list_head *head = _get_list_head(device, context);
+	struct list_head *head;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	head = _get_list_head(device, context);
 
 	event = _find_event(device, head, timestamp, func, priv);
 
diff --git a/drivers/gpu/msm2/kgsl_gpummu.c b/drivers/gpu/msm2/kgsl_gpummu.c
index 1a1e2e3..2634e4f 100644
--- a/drivers/gpu/msm2/kgsl_gpummu.c
+++ b/drivers/gpu/msm2/kgsl_gpummu.c
@@ -23,6 +23,7 @@
 #include "kgsl_device.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_trace.h"
+#include "adreno.h"
 
 #define KGSL_PAGETABLE_SIZE \
 	ALIGN(KGSL_PAGETABLE_ENTRIES(CONFIG_MSM_KGSL_PAGE_TABLE_SIZE) * \
@@ -403,11 +404,22 @@
 {
 	unsigned int reg;
 	unsigned int ptbase;
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	unsigned int no_page_fault_log = 0;
 
-	kgsl_regread(mmu->device, MH_MMU_PAGE_FAULT, &reg);
-	kgsl_regread(mmu->device, MH_MMU_PT_BASE, &ptbase);
+	device = mmu->device;
+	adreno_dev = ADRENO_DEVICE(device);
 
-	KGSL_MEM_CRIT(mmu->device,
+	kgsl_regread(device, MH_MMU_PAGE_FAULT, &reg);
+	kgsl_regread(device, MH_MMU_PT_BASE, &ptbase);
+
+
+	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
+		no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, reg);
+
+	if (!no_page_fault_log)
+		KGSL_MEM_CRIT(mmu->device,
 			"mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n",
 			reg & ~(PAGE_SIZE - 1),
 			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
diff --git a/drivers/gpu/msm2/kgsl_iommu.c b/drivers/gpu/msm2/kgsl_iommu.c
index 976d4a8..2d5c0b8 100644
--- a/drivers/gpu/msm2/kgsl_iommu.c
+++ b/drivers/gpu/msm2/kgsl_iommu.c
@@ -47,7 +47,7 @@
 	{ 0x03C, 1 },			/* TLBLKCR */
 	{ 0x818, 1 },			/* V2PUR */
 	{ 0x2C, 1 },			/* FSYNR0 */
-	{ 0x30, 1 },			/* FSYNR0 */
+	{ 0x30, 1 },			/* FSYNR1 */
 	{ 0, 0 },			/* TLBSYNC, not in v0 */
 	{ 0, 0 },			/* TLBSTATUS, not in v0 */
 	{ 0, 0 }			/* IMPLDEF_MICRO_MMU_CRTL, not in v0 */
@@ -71,6 +71,11 @@
 };
 
 /* naming mismatch with iommu things */
+static int kgsl_iommu_default_setstate(struct kgsl_mmu *mmu,
+		uint32_t flags);
+static phys_addr_t
+kgsl_iommu_get_current_ptbase(struct kgsl_mmu *mmu);
+
 static void _iommu_lock(void)
 {
 	return;
@@ -317,13 +322,13 @@
 	struct kgsl_iommu_unit *iommu_unit;
 	struct kgsl_iommu_device *iommu_dev;
 	unsigned int ptbase, fsr;
+	unsigned int pid;
+	struct _mem_entry prev, next;
+	unsigned int fsynr0, fsynr1;
+	int write;
 	struct kgsl_device *device;
 	struct adreno_device *adreno_dev;
 	unsigned int no_page_fault_log = 0;
-	unsigned int pid;
-	unsigned int fsynr0, fsynr1;
-	int write;
-	struct _mem_entry prev, next;
 	unsigned int curr_context_id = 0;
 	unsigned int curr_global_ts = 0;
 	struct kgsl_context *context;
@@ -427,6 +432,7 @@
 			_print_entry(iommu_dev->kgsldev, &next);
 		else
 			KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n");
+
 	}
 
 	trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
@@ -448,30 +454,29 @@
 /*
  * kgsl_iommu_disable_clk - Disable iommu clocks
  * @mmu - Pointer to mmu structure
+ * @unit - Iommu unit
  *
- * Disables iommu clocks
+ * Disables iommu clocks for an iommu unit
  * Return - void
  */
-static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu)
+static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu, int unit)
 {
 	struct kgsl_iommu *iommu = mmu->priv;
-	struct msm_iommu_drvdata *iommu_drvdata;
 	int i, j;
 
 	for (i = 0; i < iommu->unit_count; i++) {
 		struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
-		for (j = 0; j < iommu_unit->dev_count; j++) {
-			if (!iommu_unit->dev[j].clk_enabled)
-				continue;
-			iommu_drvdata = dev_get_drvdata(
-					iommu_unit->dev[j].dev->parent);
-			if (iommu_drvdata->aclk)
-				clk_disable_unprepare(iommu_drvdata->aclk);
-			if (iommu_drvdata->clk)
-				clk_disable_unprepare(iommu_drvdata->clk);
-			clk_disable_unprepare(iommu_drvdata->pclk);
-			iommu_unit->dev[j].clk_enabled = false;
-		}
+
+		/* Turn off the clks for IOMMU unit requested */
+		if ((unit != i) && (unit != KGSL_IOMMU_MAX_UNITS))
+			continue;
+
+		atomic_dec(&iommu_unit->clk_enable_count);
+		BUG_ON(atomic_read(&iommu_unit->clk_enable_count) < 0);
+
+		for (j = (KGSL_IOMMU_MAX_CLKS - 1); j >= 0; j--)
+			if (iommu_unit->clks[j])
+				clk_disable_unprepare(iommu_unit->clks[j]);
 	}
 }
 
@@ -491,32 +496,12 @@
 					unsigned int id, unsigned int ts,
 					u32 type)
 {
-	struct kgsl_mmu *mmu = data;
-	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_disable_clk_param *param = data;
 
-	if (!iommu->clk_event_queued) {
-		if (0 > timestamp_cmp(ts, iommu->iommu_last_cmd_ts))
-			KGSL_DRV_ERR(device,
-			"IOMMU disable clock event being cancelled, "
-			"iommu_last_cmd_ts: %x, retired ts: %x\n",
-			iommu->iommu_last_cmd_ts, ts);
-		return;
-	}
+	kgsl_iommu_disable_clk(param->mmu, param->unit);
 
-	if (0 <= timestamp_cmp(ts, iommu->iommu_last_cmd_ts)) {
-		kgsl_iommu_disable_clk(mmu);
-		iommu->clk_event_queued = false;
-	} else {
-		/* add new event to fire when ts is reached, this can happen
-		 * if we queued an event and someone requested the clocks to
-		 * be disbaled on a later timestamp */
-		if (kgsl_add_event(device, id, iommu->iommu_last_cmd_ts,
-			kgsl_iommu_clk_disable_event, mmu, mmu)) {
-				KGSL_DRV_ERR(device,
-				"Failed to add IOMMU disable clk event\n");
-				iommu->clk_event_queued = false;
-		}
-	}
+	/* Free param we are done using it */
+	kfree(param);
 }
 
 /*
@@ -526,6 +511,7 @@
  * @ts_valid - Indicates whether ts parameter is valid, if this parameter
  * is false then it means that the calling function wants to disable the
  * IOMMU clocks immediately without waiting for any timestamp
+ * @unit: IOMMU unit for which clocks are to be turned off
  *
  * Creates an event to disable the IOMMU clocks on timestamp and if event
  * already exists then updates the timestamp of disabling the IOMMU clocks
@@ -534,84 +520,88 @@
  * Return - void
  */
 static void
-kgsl_iommu_disable_clk_on_ts(struct kgsl_mmu *mmu, unsigned int ts,
-				bool ts_valid)
+kgsl_iommu_disable_clk_on_ts(struct kgsl_mmu *mmu,
+				unsigned int ts, int unit)
 {
-	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu_disable_clk_param *param;
 
-	if (iommu->clk_event_queued) {
-		if (ts_valid && (0 <
-			timestamp_cmp(ts, iommu->iommu_last_cmd_ts)))
-			iommu->iommu_last_cmd_ts = ts;
-	} else {
-		if (ts_valid) {
-			iommu->iommu_last_cmd_ts = ts;
-			iommu->clk_event_queued = true;
-			if (kgsl_add_event(mmu->device, KGSL_MEMSTORE_GLOBAL,
-				ts, kgsl_iommu_clk_disable_event, mmu, mmu)) {
-				KGSL_DRV_ERR(mmu->device,
-				"Failed to add IOMMU disable clk event\n");
-				iommu->clk_event_queued = false;
-			}
-		} else {
-			kgsl_iommu_disable_clk(mmu);
-		}
+	param = kzalloc(sizeof(*param), GFP_KERNEL);
+	if (!param) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*param));
+		return;
 	}
+	param->mmu = mmu;
+	param->unit = unit;
+	param->ts = ts;
+
+	if (kgsl_add_event(mmu->device, KGSL_MEMSTORE_GLOBAL,
+			ts, kgsl_iommu_clk_disable_event, param, mmu)) {
+		KGSL_DRV_ERR(mmu->device,
+			"Failed to add IOMMU disable clk event\n");
+		kfree(param);
+	}
+}
+
+/*
+ * kgsl_iommu_enable_clk_prepare_enable - Enable iommu clock
+ * @clk - clock to enable
+ *
+ * Prepare enables clock. Retries 3 times on enable failure, on 4th failure
+ * returns an error.
+ * Return: 0 on success else 1 on error
+ */
+
+static int kgsl_iommu_clk_prepare_enable(struct clk *clk)
+{
+	int num_retries = 4;
+
+	while (num_retries--) {
+		if (!clk_prepare_enable(clk))
+			return 0;
+	}
+
+	return 1;
 }
 
 /*
  * kgsl_iommu_enable_clk - Enable iommu clocks
  * @mmu - Pointer to mmu structure
- * @ctx_id - The context bank whose clocks are to be turned on
+ * @unit - The iommu unit whose clocks are to be turned on
  *
- * Enables iommu clocks of a given context
+ * Enables iommu clocks of a given iommu unit
  * Return: 0 on success else error code
  */
-static int kgsl_iommu_enable_clk(struct kgsl_mmu *mmu,
-				int ctx_id)
+static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu,
+				int unit)
 {
-	int ret = 0;
 	int i, j;
 	struct kgsl_iommu *iommu = mmu->priv;
-	struct msm_iommu_drvdata *iommu_drvdata;
 
 	for (i = 0; i < iommu->unit_count; i++) {
 		struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
-		for (j = 0; j < iommu_unit->dev_count; j++) {
-			if (iommu_unit->dev[j].clk_enabled ||
-				ctx_id != iommu_unit->dev[j].ctx_id)
-				continue;
-			iommu_drvdata =
-			dev_get_drvdata(iommu_unit->dev[j].dev->parent);
-			ret = clk_prepare_enable(iommu_drvdata->pclk);
-			if (ret)
-				goto done;
-			if (iommu_drvdata->clk) {
-				ret = clk_prepare_enable(iommu_drvdata->clk);
-				if (ret) {
-					clk_disable_unprepare(
-						iommu_drvdata->pclk);
-					goto done;
-				}
-			}
-			if (iommu_drvdata->aclk) {
-				ret = clk_prepare_enable(iommu_drvdata->aclk);
-				if (ret) {
-					if (iommu_drvdata->clk)
-						clk_disable_unprepare(
-							iommu_drvdata->clk);
-					clk_disable_unprepare(
-							iommu_drvdata->pclk);
-					goto done;
-				}
-			}
-			iommu_unit->dev[j].clk_enabled = true;
+
+		/* Turn on the clks for IOMMU unit requested */
+		if ((unit != i) && (unit != KGSL_IOMMU_MAX_UNITS))
+			continue;
+
+		for (j = 0; j < KGSL_IOMMU_MAX_CLKS; j++) {
+			if (iommu_unit->clks[j])
+				if (kgsl_iommu_clk_prepare_enable(
+						iommu_unit->clks[j]))
+						goto done;
 		}
+		atomic_inc(&iommu_unit->clk_enable_count);
 	}
+	return;
 done:
-	if (ret)
-		kgsl_iommu_disable_clk(mmu);
-	return ret;
+	/*
+	 * Any Clock enable failure should be fatal,
+	 * System usually crashes when enabling clock fails
+	 * BUG_ON here to catch the system in bad state for
+	 * further debug
+	 */
+	KGSL_CORE_ERR("IOMMU clk enable failed\n");
+	BUG();
 }
 
 /*
@@ -766,6 +756,7 @@
 {
 	struct kgsl_iommu_pt *iommu_pt;
 	struct kgsl_iommu *iommu = mmu->priv;
+	struct msm_iommu_drvdata *drvdata = 0;
 	int i, j, ret = 0;
 
 	/*
@@ -797,6 +788,14 @@
 				"iommu pt %p attached to dev %p, ctx_id %d\n",
 				iommu_pt->domain, iommu_unit->dev[j].dev,
 				iommu_unit->dev[j].ctx_id);
+				/* Init IOMMU unit clks here */
+				if (!drvdata) {
+					drvdata = dev_get_drvdata(
+					iommu_unit->dev[j].dev->parent);
+					iommu_unit->clks[0] = drvdata->pclk;
+					iommu_unit->clks[1] = drvdata->clk;
+					iommu_unit->clks[2] = drvdata->aclk;
+				}
 			}
 		}
 	}
@@ -822,6 +821,7 @@
 	struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[unit_id];
 	int i, j;
 	int found_ctx;
+	int ret = 0;
 
 	for (j = 0; j < KGSL_IOMMU_MAX_DEVS_PER_UNIT; j++) {
 		found_ctx = 0;
@@ -835,16 +835,22 @@
 			break;
 		if (!data->iommu_ctxs[i].iommu_ctx_name) {
 			KGSL_CORE_ERR("Context name invalid\n");
-			return -EINVAL;
+			ret = -EINVAL;
+			goto done;
 		}
+		atomic_set(&(iommu_unit->clk_enable_count), 0);
 
 		iommu_unit->dev[iommu_unit->dev_count].dev =
 			msm_iommu_get_ctx(data->iommu_ctxs[i].iommu_ctx_name);
-		if (iommu_unit->dev[iommu_unit->dev_count].dev == NULL) {
-			KGSL_CORE_ERR("Failed to get iommu dev handle for "
-			"device %s\n", data->iommu_ctxs[i].iommu_ctx_name);
-			return -EINVAL;
+		if (NULL == iommu_unit->dev[iommu_unit->dev_count].dev)
+			ret = -EINVAL;
+		if (IS_ERR(iommu_unit->dev[iommu_unit->dev_count].dev)) {
+			ret = PTR_ERR(
+				iommu_unit->dev[iommu_unit->dev_count].dev);
+			iommu_unit->dev[iommu_unit->dev_count].dev = NULL;
 		}
+		if (ret)
+			goto done;
 		iommu_unit->dev[iommu_unit->dev_count].ctx_id =
 						data->iommu_ctxs[i].ctx_id;
 		iommu_unit->dev[iommu_unit->dev_count].kgsldev = mmu->device;
@@ -856,12 +862,23 @@
 
 		iommu_unit->dev_count++;
 	}
-	if (!j) {
-		KGSL_CORE_ERR("No ctxts initialized, user ctxt absent\n ");
-		return -EINVAL;
+done:
+	if (!iommu_unit->dev_count && !ret)
+		ret = -EINVAL;
+	if (ret) {
+		/*
+		 * If at least the first context is initialized on v1
+		 * then we can continue
+		 */
+		if (!msm_soc_version_supports_iommu_v1() &&
+			iommu_unit->dev_count)
+			ret = 0;
+		else
+			KGSL_CORE_ERR(
+			"Failed to initialize iommu contexts, err: %d\n", ret);
 	}
 
-	return 0;
+	return ret;
 }
 
 /*
@@ -924,6 +941,17 @@
 		!kgsl_mmu_is_perprocess(mmu))
 		return status;
 
+	/*
+	 * For 2D devices cpu side sync lock is required. For 3D device,
+	 * since we only have a single 3D core and we always ensure that
+	 * 3D core is idle while writing to IOMMU register using CPU this
+	 * lock is not required
+	 */
+	if (KGSL_DEVICE_2D0 == mmu->device->id ||
+		KGSL_DEVICE_2D1 == mmu->device->id) {
+		return status;
+	}
+
 	/* Return if already initialized */
 	if (iommu->sync_lock_initialized)
 		return status;
@@ -993,6 +1021,10 @@
 	*cmds++ = 0x1;
 	*cmds++ = 0x1;
 
+	/* WAIT_REG_MEM turns back on protected mode - push it off */
+	*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
 	*cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
 	*cmds++ = lock_vars->turn;
 	*cmds++ = 0;
@@ -1007,11 +1039,19 @@
 	*cmds++ = 0x1;
 	*cmds++ = 0x1;
 
+	/* WAIT_REG_MEM turns back on protected mode - push it off */
+	*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
 	*cmds++ = cp_type3_packet(CP_TEST_TWO_MEMS, 3);
 	*cmds++ = lock_vars->flag[PROC_APPS];
 	*cmds++ = lock_vars->turn;
 	*cmds++ = 0;
 
+	/* TEST_TWO_MEMS turns back on protected mode - push it off */
+	*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
 	cmds += adreno_add_idle_cmds(adreno_dev, cmds);
 
 	return cmds - start;
@@ -1049,6 +1089,10 @@
 	*cmds++ = 0x1;
 	*cmds++ = 0x1;
 
+	/* WAIT_REG_MEM turns back on protected mode - push it off */
+	*cmds++ = cp_type3_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
 	cmds += adreno_add_idle_cmds(adreno_dev, cmds);
 
 	return cmds - start;
@@ -1631,16 +1675,9 @@
 		mmu->hwpagetable = NULL;
 		goto done;
 	}
-	status = kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER);
-	if (status) {
-		KGSL_CORE_ERR("clk enable failed\n");
-		goto done;
-	}
-	status = kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_PRIV);
-	if (status) {
-		KGSL_CORE_ERR("clk enable failed\n");
-		goto done;
-	}
+
+	kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
+
 	/* Get the lsb value of pagetables set in the IOMMU ttbr0 register as
 	 * that value should not change when we change pagetables, so while
 	 * changing pagetables we can use this lsb value of the pagetable w/o
@@ -1692,14 +1729,10 @@
 				KGSL_IOMMU_SETSTATE_NOP_OFFSET,
 				cp_nop_packet(1), sizeof(unsigned int));
 
-	kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
+	kgsl_iommu_disable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 	mmu->flags |= KGSL_FLAGS_STARTED;
 
 done:
-	if (status) {
-		kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
-		kgsl_detach_pagetable_iommu_domain(mmu);
-	}
 	return status;
 }
 
@@ -1708,9 +1741,11 @@
 		struct kgsl_memdesc *memdesc,
 		unsigned int *tlb_flags)
 {
-	int ret;
+	int ret = 0, lock_taken = 0;
 	unsigned int range = memdesc->size;
 	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_device *device = pt->mmu->device;
+	struct kgsl_iommu *iommu = pt->mmu->priv;
 
 	/* All GPU addresses as assigned are page aligned, but some
 	   functions purturb the gpuaddr with an offset, so apply the
@@ -1725,18 +1760,38 @@
 		range += PAGE_SIZE;
 
 	ret = iommu_unmap_range(iommu_pt->domain, gpuaddr, range);
-	if (ret)
+	if (ret) {
 		KGSL_CORE_ERR("iommu_unmap_range(%p, %x, %d) failed "
 			"with err: %d\n", iommu_pt->domain, gpuaddr,
 			range, ret);
+		return ret;
+	}
 
 	/*
-	 * Flushing only required if per process pagetables are used. With
-	 * global case, flushing will happen inside iommu_map function
+	 * Check to see if the current thread already holds the device mutex.
+	 * If it does not, then take the device mutex which is required for
+	 * flushing the tlb
 	 */
-	if (!ret && kgsl_mmu_is_perprocess(pt->mmu))
-		*tlb_flags = UINT_MAX;
-	return 0;
+	if (!mutex_is_locked(&device->mutex) ||
+		device->mutex.owner != current) {
+		mutex_lock(&device->mutex);
+		lock_taken = 1;
+	}
+
+	/*
+	 * Flush the tlb only if the iommu device is attached and the pagetable
+	 * hasn't been switched yet
+	 */
+	if (kgsl_mmu_is_perprocess(pt->mmu) &&
+		iommu->iommu_units[0].dev[KGSL_IOMMU_CONTEXT_USER].attached &&
+		kgsl_iommu_pt_equal(pt->mmu, pt,
+		kgsl_iommu_get_current_ptbase(pt->mmu)))
+		kgsl_iommu_default_setstate(pt->mmu, KGSL_MMUFLAGS_TLBFLUSH);
+
+	if (lock_taken)
+		mutex_unlock(&device->mutex);
+
+	return ret;
 }
 
 static int
@@ -1786,12 +1841,12 @@
 	int i, j;
 
 	if (atomic_read(&mmu->fault)) {
+		kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 		for (i = 0; i < iommu->unit_count; i++) {
 			struct kgsl_iommu_unit *iommu_unit =
 						&iommu->iommu_units[i];
 			for (j = 0; j < iommu_unit->dev_count; j++) {
 				if (iommu_unit->dev[j].fault) {
-					kgsl_iommu_enable_clk(mmu, j);
 					_iommu_lock();
 					KGSL_IOMMU_SET_CTX_REG(iommu,
 						iommu_unit,
@@ -1806,6 +1861,7 @@
 				}
 			}
 		}
+		kgsl_iommu_disable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 		atomic_set(&mmu->fault, 0);
 	}
 }
@@ -1813,7 +1869,6 @@
 
 static void kgsl_iommu_stop(struct kgsl_mmu *mmu)
 {
-	struct kgsl_iommu *iommu = mmu->priv;
 	/*
 	 *  stop device mmu
 	 *
@@ -1829,9 +1884,7 @@
 		kgsl_iommu_pagefault_resume(mmu);
 	}
 	/* switch off MMU clocks and cancel any events it has queued */
-	iommu->clk_event_queued = false;
 	kgsl_cancel_events(mmu->device, mmu);
-	kgsl_iommu_disable_clk(mmu);
 }
 
 static int kgsl_iommu_close(struct kgsl_mmu *mmu)
@@ -1880,11 +1933,11 @@
 	if (in_interrupt())
 		return 0;
 	/* Return the current pt base by reading IOMMU pt_base register */
-	kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER);
+	kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 	pt_base = KGSL_IOMMU_GET_CTX_REG(iommu, (&iommu->iommu_units[0]),
 					KGSL_IOMMU_CONTEXT_USER,
 					TTBR0);
-	kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
+	kgsl_iommu_disable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 	return pt_base & KGSL_IOMMU_CTX_TTBR0_ADDR_MASK;
 }
 
@@ -1907,16 +1960,11 @@
 	int temp;
 	int i;
 	int ret = 0;
-	unsigned int pt_base = kgsl_iommu_get_pt_base_addr(mmu,
+	phys_addr_t pt_base = kgsl_iommu_get_pt_base_addr(mmu,
 						mmu->hwpagetable);
 	phys_addr_t pt_val;
 
-	ret = kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER);
-
-	if (ret) {
-		KGSL_DRV_ERR(mmu->device, "Failed to enable iommu clocks\n");
-		return ret;
-	}
+	kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
 
 	/* For v0 SMMU GPU needs to be idle for tlb invalidate as well */
 	/* naming mismatch for iommu */
@@ -2003,7 +2051,8 @@
 	msm_iommu_unlock();
 
 	/* Disable smmu clock */
-	kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
+	kgsl_iommu_disable_clk(mmu, KGSL_IOMMU_MAX_UNITS);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/msm2/kgsl_iommu.h b/drivers/gpu/msm2/kgsl_iommu.h
index 7dca40e..2ff665a 100644
--- a/drivers/gpu/msm2/kgsl_iommu.h
+++ b/drivers/gpu/msm2/kgsl_iommu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -91,10 +91,16 @@
  * Max number of iommu units that the gpu core can have
  * On APQ8064, KGSL can control a maximum of 2 IOMMU units.
  */
-#define KGSL_IOMMU_MAX_UNITS 2
+enum kgsl_iommu_units {
+	KGSL_IOMMU_UNIT_0 = 0,
+	KGSL_IOMMU_UNIT_1 = 1,
+	KGSL_IOMMU_MAX_UNITS = 2,
+};
 
 /* Max number of iommu contexts per IOMMU unit */
 #define KGSL_IOMMU_MAX_DEVS_PER_UNIT 2
+/* Max number of iommu clks per IOMMU unit */
+#define KGSL_IOMMU_MAX_CLKS 3
 
 /* Macros to read/write IOMMU registers */
 #define KGSL_IOMMU_SET_CTX_REG_LL(iommu, iommu_unit, ctx, REG, val)	\
@@ -167,6 +173,8 @@
  * @iommu_halt_enable: Valid only on IOMMU-v1, when set indicates that the iommu
  * unit supports halting of the IOMMU, which can be enabled while programming
  * the IOMMU registers for synchronization
+ * @clk_enable_count: The ref count of clock enable calls
+ * @clks: iommu unit clks
  */
 struct kgsl_iommu_unit {
 	struct kgsl_iommu_device dev[KGSL_IOMMU_MAX_DEVS_PER_UNIT];
@@ -174,6 +182,8 @@
 	struct kgsl_memdesc reg_map;
 	unsigned int ahb_base;
 	int iommu_halt_enable;
+	atomic_t clk_enable_count;
+	struct clk *clks[KGSL_IOMMU_MAX_CLKS];
 };
 
 /*
@@ -182,10 +192,6 @@
  * iommu contexts owned by graphics cores
  * @unit_count: Number of IOMMU units that are available for this
  * instance of the IOMMU driver
- * @iommu_last_cmd_ts: The timestamp of last command submitted that
- * aceeses iommu registers
- * @clk_event_queued: Indicates whether an event to disable clocks
- * is already queued or not
  * @device: Pointer to kgsl device
  * @ctx_offset: The context offset to be added to base address when
  * accessing IOMMU registers
@@ -201,8 +207,6 @@
 struct kgsl_iommu {
 	struct kgsl_iommu_unit iommu_units[KGSL_IOMMU_MAX_UNITS];
 	unsigned int unit_count;
-	unsigned int iommu_last_cmd_ts;
-	bool clk_event_queued;
 	struct kgsl_device *device;
 	unsigned int ctx_offset;
 	struct kgsl_iommu_register_list *iommu_reg_list;
@@ -222,4 +226,18 @@
 	struct kgsl_iommu *iommu;
 };
 
+/*
+ * struct kgsl_iommu_disable_clk_param - Parameter struct for disble clk event
+ * @mmu: The mmu pointer
+ * @rb_level: the rb level in which the timestamp of the event belongs to
+ * @unit: The IOMMU unit whose clock is to be turned off
+ * @ts: Timestamp on which clock is to be disabled
+ */
+struct kgsl_iommu_disable_clk_param {
+	struct kgsl_mmu *mmu;
+	int rb_level;
+	int unit;
+	unsigned int ts;
+};
+
 #endif
diff --git a/drivers/gpu/msm2/kgsl_log.h b/drivers/gpu/msm2/kgsl_log.h
index 81a35e0..f90627e 100644
--- a/drivers/gpu/msm2/kgsl_log.h
+++ b/drivers/gpu/msm2/kgsl_log.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2011,2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -13,8 +13,6 @@
 #ifndef __KGSL_LOG_H
 #define __KGSL_LOG_H
 
-extern unsigned int kgsl_cff_dump_enable;
-
 #define KGSL_LOG_INFO(dev, lvl, fmt, args...) \
 	do { \
 		if ((lvl) >= 6)  \
diff --git a/drivers/gpu/msm2/kgsl_mmu.c b/drivers/gpu/msm2/kgsl_mmu.c
index 1910a46..d987c3d 100644
--- a/drivers/gpu/msm2/kgsl_mmu.c
+++ b/drivers/gpu/msm2/kgsl_mmu.c
@@ -20,6 +20,7 @@
 #include <linux/iommu.h>
 #include <mach/iommu.h>
 #include <mach/socinfo.h>
+#include <linux/types.h>
 
 #include "kgsl.h"
 #include "kgsl_mmu.h"
@@ -348,7 +349,7 @@
 	unsigned int ret = 0;
 
 	if (!mmu->mmu_ops || !mmu->mmu_ops->mmu_pt_equal)
-		return KGSL_MMU_GLOBAL_PT;
+		return 0;
 	spin_lock(&kgsl_driver.ptlock);
 	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
 		if (kref_get_unless_zero(&pt->refcount)) {
@@ -385,12 +386,18 @@
 	status = kgsl_allocate_contiguous(&mmu->setstate_memory, PAGE_SIZE);
 	if (status)
 		return status;
+
+	/* Mark the setstate memory as read only */
+	mmu->setstate_memory.flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
 	kgsl_sharedmem_set(device, &mmu->setstate_memory, 0, 0,
 				mmu->setstate_memory.size);
 
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type) {
 		dev_info(device->dev, "|%s| MMU type set for device is "
 				"NOMMU\n", __func__);
+		status = dma_set_coherent_mask(device->dev->parent,
+					DMA_BIT_MASK(sizeof(dma_addr_t)*8));
 		goto done;
 	} else if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type)
 		mmu->mmu_ops = &gpummu_ops;
@@ -730,6 +737,10 @@
 	if (!kgsl_memdesc_is_global(memdesc) &&
 		(KGSL_MEMDESC_MAPPED & memdesc->priv))
 		return -EINVAL;
+
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
+		return 0;
+
 	/* Add space for the guard page when allocating the mmu VA. */
 	size = memdesc->size;
 	if (kgsl_memdesc_has_guard_page(memdesc))
diff --git a/drivers/gpu/msm2/kgsl_mmu.h b/drivers/gpu/msm2/kgsl_mmu.h
index de6bafc..5fcc6f4 100644
--- a/drivers/gpu/msm2/kgsl_mmu.h
+++ b/drivers/gpu/msm2/kgsl_mmu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -148,11 +148,12 @@
 	void (*mmu_pagefault_resume)
 			(struct kgsl_mmu *mmu);
 	void (*mmu_disable_clk_on_ts)
-		(struct kgsl_mmu *mmu, uint32_t ts, bool ts_valid);
-	int (*mmu_enable_clk)
-		(struct kgsl_mmu *mmu, int ctx_id);
+		(struct kgsl_mmu *mmu,
+		uint32_t ts, int unit);
+	void (*mmu_enable_clk)
+		(struct kgsl_mmu *mmu, int unit);
 	void (*mmu_disable_clk)
-		(struct kgsl_mmu *mmu);
+		(struct kgsl_mmu *mmu, int unit);
 	phys_addr_t (*mmu_get_default_ttbr0)(struct kgsl_mmu *mmu,
 				unsigned int unit_id,
 				enum kgsl_iommu_context_id ctx_id);
@@ -321,26 +322,25 @@
 		return 0;
 }
 
-static inline int kgsl_mmu_enable_clk(struct kgsl_mmu *mmu,
-					int ctx_id)
+static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu, int unit)
 {
 	if (mmu->mmu_ops && mmu->mmu_ops->mmu_enable_clk)
-		return mmu->mmu_ops->mmu_enable_clk(mmu, ctx_id);
+		mmu->mmu_ops->mmu_enable_clk(mmu, unit);
 	else
-		return 0;
+		return;
 }
 
-static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu)
+static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu, int unit)
 {
 	if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk)
-		mmu->mmu_ops->mmu_disable_clk(mmu);
+		mmu->mmu_ops->mmu_disable_clk(mmu, unit);
 }
 
 static inline void kgsl_mmu_disable_clk_on_ts(struct kgsl_mmu *mmu,
-						unsigned int ts, bool ts_valid)
+						unsigned int ts, int unit)
 {
 	if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk_on_ts)
-		mmu->mmu_ops->mmu_disable_clk_on_ts(mmu, ts, ts_valid);
+		mmu->mmu_ops->mmu_disable_clk_on_ts(mmu, ts, unit);
 }
 
 static inline unsigned int kgsl_mmu_get_int_mask(void)
diff --git a/drivers/gpu/msm2/kgsl_pwrctrl.c b/drivers/gpu/msm2/kgsl_pwrctrl.c
index 7ee305f..e7eaa93 100644
--- a/drivers/gpu/msm2/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm2/kgsl_pwrctrl.c
@@ -766,12 +766,12 @@
 DEVICE_ATTR(num_pwrlevels, 0444,
 	kgsl_pwrctrl_num_pwrlevels_show,
 	NULL);
-DEVICE_ATTR(reset_count, 0444,
-	kgsl_pwrctrl_reset_count_show,
-	NULL);
 DEVICE_ATTR(pmqos_latency, 0644,
 	kgsl_pwrctrl_pmqos_latency_show,
 	kgsl_pwrctrl_pmqos_latency_store);
+DEVICE_ATTR(reset_count, 0444,
+	kgsl_pwrctrl_reset_count_show,
+	NULL);
 DEVICE_ATTR(force_clk_on, 0644,
 	kgsl_pwrctrl_force_clk_on_show,
 	kgsl_pwrctrl_force_clk_on_store);
@@ -793,8 +793,8 @@
 	&dev_attr_min_pwrlevel,
 	&dev_attr_thermal_pwrlevel,
 	&dev_attr_num_pwrlevels,
-	&dev_attr_reset_count,
 	&dev_attr_pmqos_latency,
+	&dev_attr_reset_count,
 	&dev_attr_force_clk_on,
 	&dev_attr_force_bus_on,
 	&dev_attr_force_rail_on,
@@ -1332,8 +1332,6 @@
 		break;
 	}
 
-	kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
-
 	return 0;
 }
 
@@ -1383,7 +1381,6 @@
 		break;
 	case KGSL_STATE_SLEEP:
 		status = _sleep(device);
-		kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
 		break;
 	case KGSL_STATE_SLUMBER:
 		status = _slumber(device);
@@ -1434,6 +1431,7 @@
 			kgsl_pwrstate_to_str(state),
 			context ? context->id : -1, ts_processed);
 		kgsl_context_put(context);
+
 		/* fall through */
 	case KGSL_STATE_NAP:
 		/* Turn on the core clocks */
@@ -1464,7 +1462,10 @@
 	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
 	/* Order pwrrail/clk sequence based upon platform */
 	kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
-	kgsl_pwrctrl_pwrlevel_change(device, pwr->default_pwrlevel);
+
+	if (pwr->constraint.type == KGSL_CONSTRAINT_NONE)
+		kgsl_pwrctrl_pwrlevel_change(device, pwr->default_pwrlevel);
+
 	kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
 	kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
 }
@@ -1537,13 +1538,11 @@
 	int ret = 0;
 	BUG_ON(!mutex_is_locked(&device->mutex));
 
-	if (atomic_read(&device->active_cnt) == 0) {
-		if (device->requested_state == KGSL_STATE_SUSPEND ||
-				device->state == KGSL_STATE_SUSPEND) {
-			mutex_unlock(&device->mutex);
-			wait_for_completion(&device->hwaccess_gate);
-			mutex_lock(&device->mutex);
-		}
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		(device->state != KGSL_STATE_ACTIVE)) {
+		mutex_unlock(&device->mutex);
+		wait_for_completion(&device->hwaccess_gate);
+		mutex_lock(&device->mutex);
 
 		ret = kgsl_pwrctrl_wake(device);
 	}
@@ -1596,12 +1595,9 @@
 
 	if (atomic_dec_and_test(&device->active_cnt)) {
 		if (device->state == KGSL_STATE_ACTIVE &&
-				 device->requested_state == KGSL_STATE_NONE) {
+			device->requested_state == KGSL_STATE_NONE) {
 			kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
-			if (kgsl_pwrctrl_sleep(device)) {
-				kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
-				queue_work(device->work_queue, &device->idle_check_ws);
-			}
+			queue_work(device->work_queue, &device->idle_check_ws);
 		}
 
 		mod_timer(&device->idle_timer,
diff --git a/drivers/gpu/msm2/kgsl_pwrctrl.h b/drivers/gpu/msm2/kgsl_pwrctrl.h
index 9f18160..ece58fa 100644
--- a/drivers/gpu/msm2/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm2/kgsl_pwrctrl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2014, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -27,6 +27,18 @@
 
 #define KGSL_MAX_CLKS 6
 
+/* Only two supported levels, min & max */
+#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2
+
+/* Symbolic table for the constraint type */
+#define KGSL_CONSTRAINT_TYPES \
+	{ KGSL_CONSTRAINT_NONE, "None" }, \
+	{ KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" }
+/* Symbolic table for the constraint sub type */
+#define KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES \
+	{ KGSL_CONSTRAINT_PWR_MIN, "Min" }, \
+	{ KGSL_CONSTRAINT_PWR_MAX, "Max" }
+
 struct platform_device;
 
 struct kgsl_clk_stats {
@@ -40,6 +52,18 @@
 	unsigned int elapsed_old;
 };
 
+struct kgsl_pwr_constraint {
+	unsigned int type;
+	unsigned int sub_type;
+	union {
+		struct {
+			unsigned int level;
+		} pwrlevel;
+	} hint;
+	unsigned long expires;
+	uint32_t owner_id;
+};
+
 /**
  * struct kgsl_pwrctrl - Power control settings for a KGSL device
  * @interrupt_num - The interrupt number for the device
@@ -65,6 +89,7 @@
  * @pm_qos_req_dma - the power management quality of service structure
  * @pm_qos_latency - allowed CPU latency in microseconds
  * @step_mul - multiplier for moving between power levels
+ * @constraint - currently active power constraint
  */
 
 struct kgsl_pwrctrl {
@@ -94,6 +119,7 @@
 	unsigned int pm_qos_latency;
 	unsigned int step_mul;
 	unsigned int irq_last;
+	struct kgsl_pwr_constraint constraint;
 };
 
 void kgsl_pwrctrl_irq(struct kgsl_device *device, int state);
diff --git a/drivers/gpu/msm2/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm2/kgsl_pwrscale_trustzone.c
index 40649d2..1119dfa 100644
--- a/drivers/gpu/msm2/kgsl_pwrscale_trustzone.c
+++ b/drivers/gpu/msm2/kgsl_pwrscale_trustzone.c
@@ -22,6 +22,7 @@
 #include "kgsl.h"
 #include "kgsl_pwrscale.h"
 #include "kgsl_device.h"
+#include "kgsl_trace.h"
 
 #define TZ_GOVERNOR_PERFORMANCE 0
 #define TZ_GOVERNOR_ONDEMAND    1
@@ -132,7 +133,8 @@
 	struct tz_priv *priv = pwrscale->priv;
 	if (device->state != KGSL_STATE_NAP &&
 		priv->governor == TZ_GOVERNOR_ONDEMAND)
-		kgsl_pwrctrl_pwrlevel_change(device,
+		if (device->pwrctrl.constraint.type == KGSL_CONSTRAINT_NONE)
+			kgsl_pwrctrl_pwrlevel_change(device,
 					device->pwrctrl.default_pwrlevel);
 }
 
@@ -159,11 +161,13 @@
 		(priv->bin.total_time < FLOOR))
 		return;
 
-	/* If there is an extended block of busy processing,
-	 * increase frequency.  Otherwise run the normal algorithm.
+	/* If there is an extended block of busy processing, set
+	 * frequency to turbo.  Otherwise run the normal algorithm.
 	 */
 	if (priv->bin.busy_time > CEILING) {
-		val = -1;
+		val = 0;
+		kgsl_pwrctrl_pwrlevel_change(device,
+				KGSL_PWRLEVEL_TURBO);
 	} else if (priv->idle_dcvs) {
 		idle = priv->bin.total_time - priv->bin.busy_time;
 		idle = (idle > 0) ? idle : 0;
@@ -187,9 +191,21 @@
 	 */
 	if (val > 0)
 		val *= pwr->step_mul;
-	if (val)
+
+	if ((pwr->constraint.type == KGSL_CONSTRAINT_NONE) ||
+			(time_after(jiffies, pwr->constraint.expires))) {
+
 		kgsl_pwrctrl_pwrlevel_change(device,
 					     pwr->active_pwrlevel + val);
+		if (pwr->constraint.type != KGSL_CONSTRAINT_NONE) {
+			/* Trace the constraint being un-set by the driver */
+			trace_kgsl_constraint(device,
+				pwr->constraint.type,
+				pwr->active_pwrlevel, 0);
+			/*Invalidate the constraint set */
+			pwr->constraint.type = KGSL_CONSTRAINT_NONE;
+		}
+	}
 }
 
 static void tz_busy(struct kgsl_device *device,
diff --git a/drivers/gpu/msm2/kgsl_sharedmem.c b/drivers/gpu/msm2/kgsl_sharedmem.c
index 0d6f202..8d8e963 100755
--- a/drivers/gpu/msm2/kgsl_sharedmem.c
+++ b/drivers/gpu/msm2/kgsl_sharedmem.c
@@ -390,16 +390,6 @@
 	return VM_FAULT_SIGBUS;
 }
 
-static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc)
-{
-	return VM_RESERVED | VM_DONTEXPAND;
-}
-
-static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc)
-{
-	return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
-}
-
 /*
  * kgsl_page_alloc_unmap_kernel() - Unmap the memory in memdesc
  *
@@ -518,53 +508,6 @@
 	return VM_FAULT_NOPAGE;
 }
 
-static void kgsl_ebimem_unmap_kernel(struct kgsl_memdesc *memdesc)
-{
-	mutex_lock(&kernel_map_global_lock);
-	if (!memdesc->hostptr) {
-		BUG_ON(memdesc->hostptr_count);
-		goto done;
-	}
-	memdesc->hostptr_count--;
-	if (memdesc->hostptr_count)
-		goto done;
-
-	iounmap(memdesc->hostptr);
-	memdesc->hostptr = NULL;
-done:
-	mutex_unlock(&kernel_map_global_lock);
-}
-
-static void kgsl_ebimem_free(struct kgsl_memdesc *memdesc)
-
-{
-	kgsl_driver.stats.coherent -= memdesc->size;
-	kgsl_ebimem_unmap_kernel(memdesc);
-	/* we certainly do not expect the hostptr to still be mapped */
-	BUG_ON(memdesc->hostptr);
-
-	free_contiguous_memory_by_paddr(memdesc->physaddr);
-}
-
-static int kgsl_ebimem_map_kernel(struct kgsl_memdesc *memdesc)
-{
-	int ret = 0;
-	mutex_lock(&kernel_map_global_lock);
-	if (!memdesc->hostptr) {
-		memdesc->hostptr = ioremap(memdesc->physaddr, memdesc->size);
-		if (!memdesc->hostptr) {
-			KGSL_CORE_ERR("ioremap failed, addr:0x%p, size:0x%x\n",
-				memdesc->hostptr, memdesc->size);
-			ret = -ENOMEM;
-			goto done;
-		}
-	}
-	memdesc->hostptr_count++;
-done:
-	mutex_unlock(&kernel_map_global_lock);
-	return ret;
-}
-
 static void kgsl_coherent_free(struct kgsl_memdesc *memdesc)
 {
 	kgsl_driver.stats.coherent -= memdesc->size;
@@ -572,22 +515,30 @@
 			  memdesc->hostptr, memdesc->physaddr);
 }
 
+static void kgsl_cma_coherent_free(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->hostptr) {
+		kgsl_driver.stats.coherent -= memdesc->size;
+		dma_free_coherent(memdesc->dev, memdesc->size,
+				memdesc->hostptr, memdesc->physaddr);
+	}
+}
+
 /* Global - also used by kgsl_drm.c */
 struct kgsl_memdesc_ops kgsl_page_alloc_ops = {
 	.free = kgsl_page_alloc_free,
-	.vmflags = kgsl_page_alloc_vmflags,
+	.vmflags = VM_NODUMP | VM_DONTEXPAND | VM_DONTCOPY,
 	.vmfault = kgsl_page_alloc_vmfault,
 	.map_kernel = kgsl_page_alloc_map_kernel,
 	.unmap_kernel = kgsl_page_alloc_unmap_kernel,
 };
 EXPORT_SYMBOL(kgsl_page_alloc_ops);
 
-static struct kgsl_memdesc_ops kgsl_ebimem_ops = {
-	.free = kgsl_ebimem_free,
-	.vmflags = kgsl_contiguous_vmflags,
+/* CMA ops - used during NOMMU mode */
+static struct kgsl_memdesc_ops kgsl_cma_ops = {
+	.free = kgsl_cma_coherent_free,
+	.vmflags = VM_NODUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY,
 	.vmfault = kgsl_contiguous_vmfault,
-	.map_kernel = kgsl_ebimem_map_kernel,
-	.unmap_kernel = kgsl_ebimem_unmap_kernel,
 };
 
 static struct kgsl_memdesc_ops kgsl_coherent_ops = {
@@ -651,7 +602,6 @@
 
 	sglen_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT;
 
-	memdesc->size = size;
 	memdesc->pagetable = pagetable;
 	memdesc->ops = &kgsl_page_alloc_ops;
 
@@ -714,6 +664,14 @@
 				continue;
 			}
 
+			/*
+			 * Update sglen and memdesc size,as requested allocation
+			 * not served fully. So that they can be correctly freed
+			 * in kgsl_sharedmem_free().
+			 */
+			memdesc->sglen = sglen;
+			memdesc->size = (size - len);
+
 			KGSL_CORE_ERR(
 				"Out of memory: only allocated %dKB of %dKB requested\n",
 				(size - len) >> 10, size >> 10);
@@ -730,6 +688,7 @@
 	}
 
 	memdesc->sglen = sglen;
+	memdesc->size = size;
 
 	/*
 	 * All memory that goes to the user has to be zeroed out before it gets
@@ -776,15 +735,15 @@
 	outer_cache_range_op_sg(memdesc->sg, memdesc->sglen,
 				KGSL_CACHE_OP_FLUSH);
 
-	KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
-		kgsl_driver.stats.page_alloc_max);
-
 	order = get_order(size);
 
 	if (order < 16)
 		kgsl_driver.stats.histogram[order]++;
 
 done:
+	KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.page_alloc,
+		kgsl_driver.stats.page_alloc_max);
+
 	if ((memdesc->sglen_alloc * sizeof(struct page *)) > PAGE_SIZE)
 		vfree(pages);
 	else
@@ -885,77 +844,6 @@
 }
 EXPORT_SYMBOL(kgsl_sharedmem_free);
 
-static int
-_kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc,
-			struct kgsl_pagetable *pagetable, size_t size)
-{
-	int result = 0;
-
-	memdesc->size = size;
-	memdesc->pagetable = pagetable;
-	memdesc->ops = &kgsl_ebimem_ops;
-	memdesc->physaddr = allocate_contiguous_ebi_nomap(size, SZ_8K);
-
-	if (memdesc->physaddr == 0) {
-		KGSL_CORE_ERR("allocate_contiguous_ebi_nomap(%d) failed\n",
-			size);
-		return -ENOMEM;
-	}
-
-	result = memdesc_sg_phys(memdesc, memdesc->physaddr, size);
-
-	if (result)
-		goto err;
-
-	KGSL_STATS_ADD(size, kgsl_driver.stats.coherent,
-		kgsl_driver.stats.coherent_max);
-
-err:
-	if (result)
-		kgsl_sharedmem_free(memdesc);
-
-	return result;
-}
-
-int
-kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc,
-			struct kgsl_pagetable *pagetable,
-			size_t size)
-{
-	size = ALIGN(size, PAGE_SIZE);
-	if (size == 0)
-		return -EINVAL;
-
-	return _kgsl_sharedmem_ebimem(memdesc, pagetable, size);
-}
-EXPORT_SYMBOL(kgsl_sharedmem_ebimem_user);
-
-int
-kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc,
-		struct kgsl_pagetable *pagetable, size_t size)
-{
-	int result;
-	size = ALIGN(size, 8192);
-	if (size == 0)
-		return -EINVAL;
-
-	result = _kgsl_sharedmem_ebimem(memdesc, pagetable, size);
-
-	if (result)
-		return result;
-
-	result = kgsl_ebimem_map_kernel(memdesc);
-
-	if (result) {
-		KGSL_CORE_ERR("hostptr mapping failed\n");
-		kgsl_sharedmem_free(memdesc);
-		return result;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(kgsl_sharedmem_ebimem);
-
 int
 kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
 			uint32_t *dst,
@@ -1084,3 +972,42 @@
 		snprintf(name, name_size, "unknown(%3d)", type);
 }
 EXPORT_SYMBOL(kgsl_get_memory_usage);
+
+int kgsl_cma_alloc_coherent(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc,
+			struct kgsl_pagetable *pagetable, size_t size)
+{
+	int result = 0;
+
+	if (size == 0)
+		return -EINVAL;
+
+	memdesc->size = size;
+	memdesc->pagetable = pagetable;
+	memdesc->ops = &kgsl_cma_ops;
+	memdesc->dev = device->dev->parent;
+
+	memdesc->hostptr = dma_alloc_coherent(memdesc->dev, size,
+					&memdesc->physaddr, GFP_KERNEL);
+
+	if (memdesc->hostptr == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	result = memdesc_sg_phys(memdesc, memdesc->physaddr, size);
+	if (result)
+		goto err;
+
+	/* Record statistics */
+
+	KGSL_STATS_ADD(size, kgsl_driver.stats.coherent,
+		       kgsl_driver.stats.coherent_max);
+
+err:
+	if (result)
+		kgsl_sharedmem_free(memdesc);
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_cma_alloc_coherent);
diff --git a/drivers/gpu/msm2/kgsl_sharedmem.h b/drivers/gpu/msm2/kgsl_sharedmem.h
index 339575f..8a2e7c4 100644
--- a/drivers/gpu/msm2/kgsl_sharedmem.h
+++ b/drivers/gpu/msm2/kgsl_sharedmem.h
@@ -41,13 +41,9 @@
 
 int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size);
 
-int kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc,
-			     struct kgsl_pagetable *pagetable,
-			     size_t size);
-
-int kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc,
-			struct kgsl_pagetable *pagetable,
-			size_t size);
+int kgsl_cma_alloc_coherent(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc,
+			struct kgsl_pagetable *pagetable, size_t size);
 
 void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc);
 
@@ -141,15 +137,13 @@
 
 static inline void *kgsl_sg_alloc(unsigned int sglen)
 {
+	if ((sglen == 0) || (sglen >= ULONG_MAX / sizeof(struct scatterlist)))
+		return NULL;
+
 	if ((sglen * sizeof(struct scatterlist)) <  PAGE_SIZE)
 		return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL);
-	else {
-		void *ptr = vmalloc(sglen * sizeof(struct scatterlist));
-		if (ptr)
-			memset(ptr, 0, sglen * sizeof(struct scatterlist));
-
-		return ptr;
-	}
+	else
+		return vmalloc(sglen * sizeof(struct scatterlist));
 }
 
 static inline void kgsl_sg_free(void *ptr, unsigned int sglen)
@@ -165,7 +159,7 @@
 		phys_addr_t physaddr, unsigned int size)
 {
 	memdesc->sg = kgsl_sg_alloc(1);
-	if (!memdesc->sg)
+	if (memdesc->sg == NULL)
 		return -ENOMEM;
 
 	kmemleak_not_leak(memdesc->sg);
@@ -255,14 +249,16 @@
 }
 
 static inline int
-kgsl_allocate(struct kgsl_memdesc *memdesc,
+kgsl_allocate(struct kgsl_device *device, struct kgsl_memdesc *memdesc,
 		struct kgsl_pagetable *pagetable, size_t size)
 {
 	int ret;
 	memdesc->priv |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
-	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
-		return kgsl_sharedmem_ebimem(memdesc, pagetable, size);
-
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE) {
+		size = ALIGN(size, PAGE_SIZE * 2);
+		return kgsl_cma_alloc_coherent(device, memdesc, pagetable,
+						size);
+	}
 	ret = kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
 	if (ret)
 		return ret;
@@ -278,7 +274,8 @@
 }
 
 static inline int
-kgsl_allocate_user(struct kgsl_memdesc *memdesc,
+kgsl_allocate_user(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc,
 		struct kgsl_pagetable *pagetable,
 		size_t size, unsigned int flags)
 {
@@ -289,8 +286,10 @@
 
 	memdesc->flags = flags;
 
-	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
-		ret = kgsl_sharedmem_ebimem_user(memdesc, pagetable, size);
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE) {
+		size = ALIGN(size, PAGE_SIZE);
+		ret = kgsl_cma_alloc_coherent(device, memdesc, pagetable, size);
+	}
 	else
 		ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size);
 
diff --git a/drivers/gpu/msm2/kgsl_snapshot.c b/drivers/gpu/msm2/kgsl_snapshot.c
index a81e19c..beda17f 100644
--- a/drivers/gpu/msm2/kgsl_snapshot.c
+++ b/drivers/gpu/msm2/kgsl_snapshot.c
@@ -210,10 +210,8 @@
 	header->ctxtcount = ctxtcount;
 
 	_ctxtptr = snapshot + sizeof(*header);
-
 	/* append information for the global context */
 	snapshot_context_info(KGSL_MEMSTORE_GLOBAL, NULL, device);
-
 	/* append information for each context */
 
 	read_lock(&device->context_lock);
diff --git a/drivers/gpu/msm2/kgsl_sync.c b/drivers/gpu/msm2/kgsl_sync.c
index 0e7606e..dd32e14 100644
--- a/drivers/gpu/msm2/kgsl_sync.c
+++ b/drivers/gpu/msm2/kgsl_sync.c
@@ -13,12 +13,9 @@
 
 #include <linux/err.h>
 #include <linux/file.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 
-#include <asm/current.h>
-
 #include "kgsl_sync.h"
 
 struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
@@ -90,9 +87,7 @@
 	void *priv, u32 context_id, u32 timestamp, u32 type)
 {
 	struct kgsl_fence_event_priv *ev = priv;
-
-	/* Signal time timeline for every event type */
-	kgsl_sync_timeline_signal(ev->context->timeline, timestamp);
+	kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
 	kgsl_context_put(ev->context);
 	kfree(ev);
 }
@@ -130,10 +125,8 @@
 
 	context = kgsl_context_get_owner(owner, context_id);
 
-	if (context == NULL) {
-		kfree(event);
-		return -EINVAL;
-	}
+	if (context == NULL)
+		goto fail_pt;
 
 	event->context = context;
 	event->timestamp = timestamp;
@@ -192,35 +185,6 @@
 	return ret;
 }
 
-static unsigned int kgsl_sync_get_timestamp(
-	struct kgsl_sync_timeline *ktimeline, enum kgsl_timestamp_type type)
-{
-	struct kgsl_context *context = idr_find(&ktimeline->device->context_idr,
-						ktimeline->context_id);
-	if (context == NULL)
-		return 0;
-
-	return kgsl_readtimestamp(ktimeline->device, context, type);
-}
-
-static void kgsl_sync_timeline_value_str(struct sync_timeline *sync_timeline,
-					 char *str, int size)
-{
-	struct kgsl_sync_timeline *ktimeline =
-		(struct kgsl_sync_timeline *) sync_timeline;
-	unsigned int timestamp_retired = kgsl_sync_get_timestamp(ktimeline,
-		KGSL_TIMESTAMP_RETIRED);
-	snprintf(str, size, "%u retired:%u", ktimeline->last_timestamp,
-		timestamp_retired);
-}
-
-static void kgsl_sync_pt_value_str(struct sync_pt *sync_pt,
-				   char *str, int size)
-{
-	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
-	snprintf(str, size, "%u", kpt->timestamp);
-}
-
 static void kgsl_sync_timeline_release_obj(struct sync_timeline *sync_timeline)
 {
 	/*
@@ -235,8 +199,6 @@
 	.dup = kgsl_sync_pt_dup,
 	.has_signaled = kgsl_sync_pt_has_signaled,
 	.compare = kgsl_sync_pt_compare,
-	.timeline_value_str = kgsl_sync_timeline_value_str,
-	.pt_value_str = kgsl_sync_pt_value_str,
 	.release_obj = kgsl_sync_timeline_release_obj,
 };
 
@@ -244,25 +206,13 @@
 {
 	struct kgsl_sync_timeline *ktimeline;
 
-	/* Generate a name which includes the thread name, thread id, process
-	 * name, process id, and context id. This makes it possible to
-	 * identify the context of a timeline in the sync dump. */
-	char ktimeline_name[sizeof(context->timeline->name)] = {};
-	snprintf(ktimeline_name, sizeof(ktimeline_name),
-		"%s_%.15s(%d)-%.15s(%d)-%d",
-		context->device->name,
-		current->group_leader->comm, current->group_leader->pid,
-		current->comm, current->pid, context->id);
-
 	context->timeline = sync_timeline_create(&kgsl_sync_timeline_ops,
-		(int) sizeof(struct kgsl_sync_timeline), ktimeline_name);
+		(int) sizeof(struct kgsl_sync_timeline), "kgsl-timeline");
 	if (context->timeline == NULL)
 		return -EINVAL;
 
 	ktimeline = (struct kgsl_sync_timeline *) context->timeline;
 	ktimeline->last_timestamp = 0;
-	ktimeline->device = context->dev_priv->device;
-	ktimeline->context_id = context->id;
 
 	return 0;
 }
@@ -305,7 +255,7 @@
 		return ERR_PTR(-EINVAL);
 
 	/* create the waiter */
-	kwaiter = kzalloc(sizeof(*kwaiter), GFP_KERNEL);
+	kwaiter = kzalloc(sizeof(*kwaiter), GFP_ATOMIC);
 	if (kwaiter == NULL) {
 		sync_fence_put(fence);
 		return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/msm2/kgsl_sync.h b/drivers/gpu/msm2/kgsl_sync.h
index 275eaf0..2f28b21 100644
--- a/drivers/gpu/msm2/kgsl_sync.h
+++ b/drivers/gpu/msm2/kgsl_sync.h
@@ -19,8 +19,6 @@
 struct kgsl_sync_timeline {
 	struct sync_timeline timeline;
 	unsigned int last_timestamp;
-	struct kgsl_device *device;
-	u32 context_id;
 };
 
 struct kgsl_sync_pt {
diff --git a/drivers/gpu/msm2/kgsl_trace.h b/drivers/gpu/msm2/kgsl_trace.h
index 5f39b8b..b4242d4 100644
--- a/drivers/gpu/msm2/kgsl_trace.h
+++ b/drivers/gpu/msm2/kgsl_trace.h
@@ -676,6 +676,36 @@
 	)
 );
 
+TRACE_EVENT(kgsl_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int type,
+		unsigned int value, unsigned int on),
+
+	TP_ARGS(device, type, value, on),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, type)
+		__field(unsigned int, value)
+		__field(unsigned int, on)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->type = type;
+		__entry->value = value;
+		__entry->on = on;
+	),
+
+	TP_printk(
+		"d_name=%s constraint_type=%s constraint_value=%u status=%s",
+		__get_str(device_name),
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__entry->value,
+		__entry->on ? "ON" : "OFF"
+	)
+);
+
 TRACE_EVENT(kgsl_mmu_pagefault,
 
 	TP_PROTO(struct kgsl_device *device, unsigned int page,
diff --git a/drivers/gpu/msm2/z180.c b/drivers/gpu/msm2/z180.c
index 45f200c..ae7aee0 100644
--- a/drivers/gpu/msm2/z180.c
+++ b/drivers/gpu/msm2/z180.c
@@ -514,7 +514,7 @@
 	z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0);
 error:
 	kgsl_trace_issueibcmds(device, context->id, cmdbatch,
-		*timestamp, cmdbatch->flags, result, 0);
+		*timestamp, cmdbatch ? cmdbatch->flags : 0, result, 0);
 
 	kgsl_active_count_put(device);
 error_active_count:
@@ -614,7 +614,6 @@
 
 	z180_cmdstream_start(device);
 
-	mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
 	device->ftbl->irqctrl(device, 1);
 
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index a36e60d..8d17dc6 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -24,6 +24,7 @@
 
 #define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200
 #define KGSL_CONTEXT_SYNC               0x00000400
+#define KGSL_CONTEXT_PWR_CONSTRAINT     0x00000800
 /* bits [12:15] are reserved for future use */
 #define KGSL_CONTEXT_TYPE_MASK          0x01F00000
 #define KGSL_CONTEXT_TYPE_SHIFT         20
@@ -196,6 +197,7 @@
 	KGSL_PROP_VERSION         = 0x00000008,
 	KGSL_PROP_GPU_RESET_STAT  = 0x00000009,
 	KGSL_PROP_PWRCTRL         = 0x0000000E,
+	KGSL_PROP_PWR_CONSTRAINT  = 0x00000012,
 };
 
 struct kgsl_shadowprop {
@@ -689,7 +691,8 @@
  * struct kgsl_perfcounter_get - argument to IOCTL_KGSL_PERFCOUNTER_GET
  * @groupid: Performance counter group ID
  * @countable: Countable to select within the group
- * @offset: Return offset of the reserved counter
+ * @offset: Return offset of the reserved LO counter
+ * @offset_hi: Return offset of the reserved HI counter
  *
  * Get an available performance counter from a specified groupid.  The offset
  * of the performance counter will be returned after successfully assigning
@@ -704,8 +707,9 @@
 	unsigned int groupid;
 	unsigned int countable;
 	unsigned int offset;
+	unsigned int offset_hi;
 /* private: reserved for future use */
-	unsigned int __pad[2]; /* For future binary compatibility */
+	unsigned int __pad; /* For future binary compatibility */
 };
 
 #define IOCTL_KGSL_PERFCOUNTER_GET \
@@ -881,6 +885,34 @@
 #define IOCTL_KGSL_SUBMIT_COMMANDS \
 	_IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands)
 
+/**
+ * struct kgsl_device_constraint - device constraint argument
+ * @context_id: KGSL context ID
+ * @type: type of constraint i.e pwrlevel/none
+ * @data: constraint data
+ * @size: size of the constraint data
+ */
+struct kgsl_device_constraint {
+	unsigned int type;
+	unsigned int context_id;
+	void __user *data;
+	size_t size;
+};
+
+/* Constraint Type*/
+#define KGSL_CONSTRAINT_NONE 0
+#define KGSL_CONSTRAINT_PWRLEVEL 1
+
+/* PWRLEVEL constraint level*/
+/* set to min frequency */
+#define KGSL_CONSTRAINT_PWR_MIN    0
+/* set to max frequency */
+#define KGSL_CONSTRAINT_PWR_MAX    1
+
+struct kgsl_device_constraint_pwrlevel {
+	unsigned int level;
+};
+
 #ifdef __KERNEL__
 #ifdef CONFIG_MSM_KGSL_DRM
 int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,