kgsl and video/msm from msm8960 2.5 branch
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index 3441afa..fec5363 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Iinclude/uapi/drm -Iinclude/drm -Idrivers/gpu/msm
+ccflags-y := -Iinclude/drm -Idrivers/gpu/msm
 
 msm_kgsl_core-y = \
 	kgsl.o \
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index a2f0e60..be9f3ac 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -66,103 +66,15 @@
 #define A3XX_RBBM_INT_0_MASK 0x063
 #define A3XX_RBBM_INT_0_STATUS 0x064
 #define A3XX_RBBM_PERFCTR_CTL 0x80
-#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
-#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
-#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
-#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
-#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
-#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
 #define A3XX_RBBM_GPU_BUSY_MASKED 0x88
-#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
-#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
-#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
-#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
-#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
-#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
-#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
-#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
-#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
-#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
-#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
-#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
-#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
-#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
-#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
-#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
-#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
-#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
-#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
-#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
-#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
-#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
-#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
-#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
-#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
-#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
-#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
-#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
-#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
-#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
-#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
-#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
-#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
-#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
-#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
-#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
-#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
-#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
-#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
-#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
-#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
-#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
-#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
-#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
-#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
-#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
-#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
-#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
-#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
-#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
-#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
-#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
-#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
-#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
-#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
-#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
-#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
-#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
-#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
-#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
-#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
-#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
-#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
-#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
-#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
-#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
-#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
-#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
-#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
-#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
-#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
-#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
-#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
-#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
-#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
-#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
 #define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
 #define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
 #define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
 #define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
 #define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
 #define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
-#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
-#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
-#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
-#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
-
 #define A3XX_RBBM_RBBM_CTL 0x100
-#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
-#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
+#define A3XX_RBBM_RBBM_CTL 0x100
 #define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
 #define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
 #define A3XX_RBBM_DEBUG_BUS_CTL             0x111
@@ -178,7 +90,6 @@
 #define A3XX_CP_MERCIU_DATA2 0x1D3
 #define A3XX_CP_MEQ_ADDR 0x1DA
 #define A3XX_CP_MEQ_DATA 0x1DB
-#define A3XX_CP_PERFCOUNTER_SELECT 0x445
 #define A3XX_CP_HW_FAULT  0x45C
 #define A3XX_CP_AHB_FAULT 0x54D
 #define A3XX_CP_PROTECT_CTRL 0x45E
@@ -227,14 +138,6 @@
 #define A3XX_VSC_PIPE_CONFIG_7 0xC1B
 #define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
 #define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
-#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
-#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
-#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
-#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
-#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
-#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
-#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
-#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
 #define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
 #define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
 #define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
@@ -260,42 +163,14 @@
 #define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
 #define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
 #define A3XX_RB_GMEM_BASE_ADDR 0xCC0
-#define A3XX_RB_PERFCOUNTER0_SELECT   0xCC6
-#define A3XX_RB_PERFCOUNTER1_SELECT   0xCC7
-#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
-#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
-#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
-#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
-#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
-#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
 #define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
-#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
 #define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
 #define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
-#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
-#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
 #define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
-#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
-#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
-#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
-#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
-#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
-#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
 #define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
-#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
-#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
-#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
-#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
-#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
 #define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
 #define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
 #define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
-#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
-#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
-#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
-#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
-#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
-#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
 #define A3XX_GRAS_CL_CLIP_CNTL 0x2040
 #define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
 #define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
@@ -357,14 +232,12 @@
 #define A3XX_SP_VS_OUT_REG_7 0x22CE
 #define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
 #define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
-#define A3XX_SP_VS_OBJ_START_REG 0x22D5
 #define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
 #define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
 #define A3XX_SP_VS_LENGTH_REG 0x22DF
 #define A3XX_SP_FS_CTRL_REG0 0x22E0
 #define A3XX_SP_FS_CTRL_REG1 0x22E1
 #define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
-#define A3XX_SP_FS_OBJ_START_REG 0x22E3
 #define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
 #define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
 #define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
@@ -398,10 +271,8 @@
 #define A3XX_VBIF_OUT_AXI_AOOO 0x305F
 
 /* Bit flags for RBBM_CTL */
-#define RBBM_RBBM_CTL_RESET_PWR_CTR0  BIT(0)
-#define RBBM_RBBM_CTL_RESET_PWR_CTR1  BIT(1)
-#define RBBM_RBBM_CTL_ENABLE_PWR_CTR0  BIT(16)
-#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1  BIT(17)
+#define RBBM_RBBM_CTL_RESET_PWR_CTR1  (1 << 1)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1  (1 << 17)
 
 /* Various flags used by the context switch code */
 
@@ -666,15 +537,7 @@
 #define RBBM_BLOCK_ID_MARB_3           0x2b
 
 /* RBBM_CLOCK_CTL default value */
-#define A305_RBBM_CLOCK_CTL_DEFAULT   0xAAAAAAAA
-#define A305C_RBBM_CLOCK_CTL_DEFAULT  0xAAAAAAAA
-#define A320_RBBM_CLOCK_CTL_DEFAULT   0xBFFFFFFF
-#define A330_RBBM_CLOCK_CTL_DEFAULT   0xBFFCFFFF
-#define A330v2_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF
-#define A305B_RBBM_CLOCK_CTL_DEFAULT  0xAAAAAAAA
-
-#define A330_RBBM_GPR0_CTL_DEFAULT    0x00000000
-#define A330v2_RBBM_GPR0_CTL_DEFAULT  0x00000000
+#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF
 
 /* COUNTABLE FOR SP PERFCOUNTER */
 #define SP_FS_FULL_ALU_INSTRUCTIONS    0x0E
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 62b6a71..b49261c 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -17,6 +17,7 @@
 #include <linux/sched.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
+#include <linux/msm_kgsl.h>
 #include <linux/delay.h>
 
 #include <mach/socinfo.h>
@@ -30,7 +31,6 @@
 #include "kgsl_cffdump.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_iommu.h"
-#include "kgsl_trace.h"
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
@@ -100,7 +100,6 @@
 			.irq_name = KGSL_3D0_IRQ,
 		},
 		.iomemname = KGSL_3D0_REG_MEMORY,
-		.shadermemname = KGSL_3D0_SHADER_MEMORY,
 		.ftbl = &adreno_functable,
 #ifdef CONFIG_HAS_EARLYSUSPEND
 		.display_off = {
@@ -168,10 +167,10 @@
 	/* size of gmem for gpu*/
 	unsigned int gmem_size;
 	/* version of pm4 microcode that supports sync_lock
-	   between CPU and GPU for IOMMU-v0 programming */
+	   between CPU and GPU for SMMU-v1 programming */
 	unsigned int sync_lock_pm4_ver;
 	/* version of pfp microcode that supports sync_lock
-	   between CPU and GPU for IOMMU-v0 programming */
+	   between CPU and GPU for SMMU-v1 programming */
 	unsigned int sync_lock_pfp_ver;
 } adreno_gpulist[] = {
 	{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
@@ -200,331 +199,18 @@
 		"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
 		1536, 768, 3, SZ_512K, 0x225011, 0x225002 },
 	/* A3XX doesn't use the pix_shader_start */
-	{ ADRENO_REV_A305, 3, 0, 5, 0,
+	{ ADRENO_REV_A305, 3, 0, 5, ANY_ID,
 		"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
 		512, 0, 2, SZ_256K, 0x3FF037, 0x3FF016 },
 	/* A3XX doesn't use the pix_shader_start */
 	{ ADRENO_REV_A320, 3, 2, ANY_ID, ANY_ID,
 		"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
 		512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
-	{ ADRENO_REV_A330, 3, 3, 0, ANY_ID,
+	{ ADRENO_REV_A330, 3, 3, 0, 0,
 		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
 		512, 0, 2, SZ_1M, NO_VER, NO_VER },
-	{ ADRENO_REV_A305B, 3, 0, 5, 0x10,
-		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
-		512, 0, 2, SZ_128K, NO_VER, NO_VER },
-	{ ADRENO_REV_A305C, 3, 0, 5, 0x20,
-		"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
-		512, 0, 2, SZ_128K, 0x3FF037, 0x3FF016 },
 };
 
-/**
- * adreno_perfcounter_init: Reserve kernel performance counters
- * @device: device to configure
- *
- * The kernel needs/wants a certain group of performance counters for
- * its own activities.  Reserve these performance counters at init time
- * to ensure that they are always reserved for the kernel.  The performance
- * counters used by the kernel can be obtained by the user, but these
- * performance counters will remain active as long as the device is alive.
- */
-
-static void adreno_perfcounter_init(struct kgsl_device *device)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
-	if (adreno_dev->gpudev->perfcounter_init)
-		adreno_dev->gpudev->perfcounter_init(adreno_dev);
-};
-
-/**
- * adreno_perfcounter_start: Enable performance counters
- * @adreno_dev: Adreno device to configure
- *
- * Ensure all performance counters are enabled that are allocated.  Since
- * the device was most likely stopped, we can't trust that the counters
- * are still valid so make it so.
- */
-
-static void adreno_perfcounter_start(struct adreno_device *adreno_dev)
-{
-	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
-	struct adreno_perfcount_group *group;
-	unsigned int i, j;
-
-	/* group id iter */
-	for (i = 0; i < counters->group_count; i++) {
-		group = &(counters->groups[i]);
-
-		/* countable iter */
-		for (j = 0; j < group->reg_count; j++) {
-			if (group->regs[j].countable ==
-					KGSL_PERFCOUNTER_NOT_USED)
-				continue;
-
-			if (adreno_dev->gpudev->perfcounter_enable)
-				adreno_dev->gpudev->perfcounter_enable(
-					adreno_dev, i, j,
-					group->regs[j].countable);
-		}
-	}
-}
-
-/**
- * adreno_perfcounter_read_group: Determine which countables are in counters
- * @adreno_dev: Adreno device to configure
- * @reads: List of kgsl_perfcounter_read_groups
- * @count: Length of list
- *
- * Read the performance counters for the groupid/countable pairs and return
- * the 64 bit result for each pair
- */
-
-int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
-	struct kgsl_perfcounter_read_group *reads, unsigned int count)
-{
-	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
-	struct adreno_perfcount_group *group;
-	struct kgsl_perfcounter_read_group *list = NULL;
-	unsigned int i, j;
-	int ret = 0;
-
-	/* perfcounter get/put/query/read not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
-		return -EINVAL;
-
-	/* sanity check for later */
-	if (!adreno_dev->gpudev->perfcounter_read)
-		return -EINVAL;
-
-	/* sanity check params passed in */
-	if (reads == NULL || count == 0 || count > 100)
-		return -EINVAL;
-
-	/* verify valid inputs group ids and countables */
-	for (i = 0; i < count; i++) {
-		if (reads[i].groupid >= counters->group_count)
-			return -EINVAL;
-	}
-
-	list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count,
-			GFP_KERNEL);
-	if (!list)
-		return -ENOMEM;
-
-	if (copy_from_user(list, reads,
-			sizeof(struct kgsl_perfcounter_read_group) * count)) {
-		ret = -EFAULT;
-		goto done;
-	}
-
-	/* list iterator */
-	for (j = 0; j < count; j++) {
-		list[j].value = 0;
-
-		group = &(counters->groups[list[j].groupid]);
-
-		/* group/counter iterator */
-		for (i = 0; i < group->reg_count; i++) {
-			if (group->regs[i].countable == list[j].countable) {
-				list[j].value =
-					adreno_dev->gpudev->perfcounter_read(
-					adreno_dev, list[j].groupid,
-					i, group->regs[i].offset);
-				break;
-			}
-		}
-	}
-
-	/* write the data */
-	if (copy_to_user(reads, list,
-			sizeof(struct kgsl_perfcounter_read_group) *
-			count) != 0)
-		ret = -EFAULT;
-
-done:
-	kfree(list);
-	return ret;
-}
-
-/**
- * adreno_perfcounter_query_group: Determine which countables are in counters
- * @adreno_dev: Adreno device to configure
- * @groupid: Desired performance counter group
- * @countables: Return list of all countables in the groups counters
- * @count: Max length of the array
- * @max_counters: max counters for the groupid
- *
- * Query the current state of counters for the group.
- */
-
-int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
-	unsigned int groupid, unsigned int *countables, unsigned int count,
-	unsigned int *max_counters)
-{
-	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
-	struct adreno_perfcount_group *group;
-	unsigned int i;
-
-	*max_counters = 0;
-
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
-		return -EINVAL;
-
-	if (groupid >= counters->group_count)
-		return -EINVAL;
-
-	group = &(counters->groups[groupid]);
-	*max_counters = group->reg_count;
-
-	/*
-	 * if NULL countable or *count of zero, return max reg_count in
-	 * *max_counters and return success
-	 */
-	if (countables == NULL || count == 0)
-		return 0;
-
-	/*
-	 * Go through all available counters.  Write upto *count * countable
-	 * values.
-	 */
-	for (i = 0; i < group->reg_count && i < count; i++) {
-		if (copy_to_user(&countables[i], &(group->regs[i].countable),
-				sizeof(unsigned int)) != 0)
-			return -EFAULT;
-	}
-
-	return 0;
-}
-
-/**
- * adreno_perfcounter_get: Try to put a countable in an available counter
- * @adreno_dev: Adreno device to configure
- * @groupid: Desired performance counter group
- * @countable: Countable desired to be in a counter
- * @offset: Return offset of the countable
- * @flags: Used to setup kernel perf counters
- *
- * Try to place a countable in an available counter.  If the countable is
- * already in a counter, reference count the counter/countable pair resource
- * and return success
- */
-
-int adreno_perfcounter_get(struct adreno_device *adreno_dev,
-	unsigned int groupid, unsigned int countable, unsigned int *offset,
-	unsigned int flags)
-{
-	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
-	struct adreno_perfcount_group *group;
-	unsigned int i, empty = -1;
-
-	/* always clear return variables */
-	if (offset)
-		*offset = 0;
-
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
-		return -EINVAL;
-
-	if (groupid >= counters->group_count)
-		return -EINVAL;
-
-	group = &(counters->groups[groupid]);
-
-	/*
-	 * Check if the countable is already associated with a counter.
-	 * Refcount and return the offset, otherwise, try and find an empty
-	 * counter and assign the countable to it.
-	 */
-	for (i = 0; i < group->reg_count; i++) {
-		if (group->regs[i].countable == countable) {
-			/* Countable already associated with counter */
-			group->regs[i].refcount++;
-			group->regs[i].flags |= flags;
-			if (offset)
-				*offset = group->regs[i].offset;
-			return 0;
-		} else if (group->regs[i].countable ==
-			KGSL_PERFCOUNTER_NOT_USED) {
-			/* keep track of unused counter */
-			empty = i;
-		}
-	}
-
-	/* no available counters, so do nothing else */
-	if (empty == -1)
-		return -EBUSY;
-
-	/* initialize the new counter */
-	group->regs[empty].countable = countable;
-	group->regs[empty].refcount = 1;
-
-	/* enable the new counter */
-	adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty,
-		countable);
-
-	group->regs[empty].flags = flags;
-
-	if (offset)
-		*offset = group->regs[empty].offset;
-
-	return 0;
-}
-
-
-/**
- * adreno_perfcounter_put: Release a countable from counter resource
- * @adreno_dev: Adreno device to configure
- * @groupid: Desired performance counter group
- * @countable: Countable desired to be freed from a  counter
- *
- * Put a performance counter/countable pair that was previously received.  If
- * noone else is using the countable, free up the counter for others.
- */
-int adreno_perfcounter_put(struct adreno_device *adreno_dev,
-	unsigned int groupid, unsigned int countable)
-{
-	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
-	struct adreno_perfcount_group *group;
-
-	unsigned int i;
-
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
-		return -EINVAL;
-
-	if (groupid >= counters->group_count)
-		return -EINVAL;
-
-	group = &(counters->groups[groupid]);
-
-	for (i = 0; i < group->reg_count; i++) {
-		if (group->regs[i].countable == countable) {
-			if (group->regs[i].refcount > 0) {
-				group->regs[i].refcount--;
-
-				/*
-				 * book keeping to ensure we never free a
-				 * perf counter used by kernel
-				 */
-				if (group->regs[i].flags &&
-					group->regs[i].refcount == 0)
-					group->regs[i].refcount++;
-
-				/* make available if not used */
-				if (group->regs[i].refcount == 0)
-					group->regs[i].countable =
-						KGSL_PERFCOUNTER_NOT_USED;
-			}
-
-			return 0;
-		}
-	}
-
-	return -EINVAL;
-}
-
 static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
 {
 	irqreturn_t result;
@@ -569,29 +255,26 @@
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 
-	result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc);
+	result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc,
+				     GSL_PT_PAGE_RV);
 	if (result)
 		goto error;
 
-	result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc);
+	result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 	if (result)
 		goto unmap_buffer_desc;
 
-	result = kgsl_mmu_map_global(pagetable, &device->memstore);
+	result = kgsl_mmu_map_global(pagetable, &device->memstore,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 	if (result)
 		goto unmap_memptrs_desc;
 
-	result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory);
+	result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 	if (result)
 		goto unmap_memstore_desc;
 
-	/*
-	 * Set the mpu end to the last "normal" global memory we use.
-	 * For the IOMMU, this will be used to restrict access to the
-	 * mapped registers.
-	 */
-	device->mh.mpu_range = device->mmu.setstate_memory.gpuaddr +
-				device->mmu.setstate_memory.size;
 	return result;
 
 unmap_memstore_desc:
@@ -612,7 +295,7 @@
 					uint32_t flags)
 {
 	unsigned int pt_val, reg_pt_val;
-	unsigned int link[230];
+	unsigned int link[250];
 	unsigned int *cmds = &link[0];
 	int sizedwords = 0;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -620,14 +303,8 @@
 	struct kgsl_context *context;
 	struct adreno_context *adreno_ctx = NULL;
 
-	/*
-	 * If we're idle and we don't need to use the GPU to save context
-	 * state, use the CPU instead of the GPU to reprogram the
-	 * iommu for simplicity's sake.
-	 */
-	 if (!adreno_dev->drawctxt_active || device->ftbl->isidle(device))
+	if (!adreno_dev->drawctxt_active)
 		return kgsl_mmu_device_setstate(&device->mmu, flags);
-
 	num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
 
 	context = idr_find(&device->context_idr, context_id);
@@ -751,7 +428,7 @@
 		adreno_dev->ringbuffer.timestamp[KGSL_MEMSTORE_GLOBAL], true);
 	}
 
-	if (sizedwords > (ARRAY_SIZE(link))) {
+	if (sizedwords > (sizeof(link)/sizeof(unsigned int))) {
 		KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
 		BUG();
 	}
@@ -929,7 +606,7 @@
 	/* 8x25 returns 0 for minor id, but it should be 1 */
 	if (cpu_is_qsd8x50())
 		patchid = 1;
-	else if ((cpu_is_msm8625() || cpu_is_msm8625q()) && minorid == 0)
+	else if (cpu_is_msm8625() && minorid == 0)
 		minorid = 1;
 
 	chipid |= (minorid << 8) | patchid;
@@ -993,6 +670,7 @@
 	adreno_dev->instruction_size = adreno_gpulist[i].instruction_size;
 	adreno_dev->gmem_size = adreno_gpulist[i].gmem_size;
 	adreno_dev->gpulist_index = i;
+
 }
 
 static struct platform_device_id adreno_id_table[] = {
@@ -1079,10 +757,6 @@
 		&pdata->init_level))
 		pdata->init_level = 1;
 
-	if (adreno_of_read_property(parent, "qcom,step-pwrlevel",
-		&pdata->step_mul))
-		pdata->step_mul = 1;
-
 	if (pdata->init_level < 0 || pdata->init_level > pdata->num_levels) {
 		KGSL_CORE_ERR("Initial power level out of range\n");
 		pdata->init_level = 1;
@@ -1308,17 +982,9 @@
 			goto err;
 		}
 
-		ret = of_property_read_u32_array(child, "reg", reg_val, 2);
-		if (ret) {
-			KGSL_CORE_ERR("Unable to read KGSL IOMMU 'reg'\n");
+		if (adreno_of_read_property(child, "qcom,iommu-ctx-sids",
+			&ctxs[ctx_index].ctx_id))
 			goto err;
-		}
-		if (msm_soc_version_supports_iommu_v0())
-			ctxs[ctx_index].ctx_id = (reg_val[0] -
-				data->physstart) >> KGSL_IOMMU_CTX_SHIFT;
-		else
-			ctxs[ctx_index].ctx_id = ((reg_val[0] -
-				data->physstart) >> KGSL_IOMMU_CTX_SHIFT) - 8;
 
 		ctx_index++;
 	}
@@ -1373,17 +1039,15 @@
 	if (ret)
 		goto err;
 
+	/* Default value is 83, if not found in DT */
 	if (adreno_of_read_property(pdev->dev.of_node, "qcom,idle-timeout",
 		&pdata->idle_timeout))
-		pdata->idle_timeout = HZ/12;
+		pdata->idle_timeout = 83;
 
 	if (adreno_of_read_property(pdev->dev.of_node, "qcom,nap-allowed",
 		&pdata->nap_allowed))
 		pdata->nap_allowed = 1;
 
-	pdata->strtstp_sleepwake = of_property_read_bool(pdev->dev.of_node,
-						"qcom,strtstp-sleepwake");
-
 	if (adreno_of_read_property(pdev->dev.of_node, "qcom,clk-map",
 		&pdata->clk_map))
 		goto err;
@@ -1435,8 +1099,7 @@
 static int
 adreno_ocmem_gmem_malloc(struct adreno_device *adreno_dev)
 {
-	if (!(adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)))
+	if (!adreno_is_a330(adreno_dev))
 		return 0;
 
 	/* OCMEM is only needed once, do not support consective allocation */
@@ -1457,8 +1120,7 @@
 static void
 adreno_ocmem_gmem_free(struct adreno_device *adreno_dev)
 {
-	if (!(adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)))
+	if (!adreno_is_a330(adreno_dev))
 		return;
 
 	if (adreno_dev->ocmem_hdl == NULL)
@@ -1541,10 +1203,10 @@
 	return 0;
 }
 
-static int adreno_init(struct kgsl_device *device)
+static int adreno_start(struct kgsl_device *device, unsigned int init_ram)
 {
+	int status = -EINVAL;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 
 	if (KGSL_STATE_DUMP_AND_FT != device->state)
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
@@ -1570,9 +1232,10 @@
 	if (adreno_dev->gpurev == ADRENO_REV_UNKNOWN) {
 		KGSL_DRV_ERR(device, "Unknown chip ID %x\n",
 			adreno_dev->chip_id);
-		BUG_ON(1);
+		goto error_clk_off;
 	}
 
+
 	/*
 	 * Check if firmware supports the sync lock PM4 packets needed
 	 * for IOMMUv1
@@ -1584,34 +1247,7 @@
 		adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver))
 		device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC;
 
-	rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
-
-	/* Assign correct RBBM status register to hang detect regs
-	 */
-	ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
-
-	adreno_perfcounter_init(device);
-
-	/* Power down the device */
-	kgsl_pwrctrl_disable(device);
-
-	return 0;
-}
-
-static int adreno_start(struct kgsl_device *device)
-{
-	int status = -EINVAL;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
-	kgsl_cffdump_open(device);
-
-	if (KGSL_STATE_DUMP_AND_FT != device->state)
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
-
-	/* Power up the device */
-	kgsl_pwrctrl_enable(device);
-
-	/* Set up a2xx special case */
+	/* Set up the MMU */
 	if (adreno_is_a2xx(adreno_dev)) {
 		/*
 		 * the MH_CLNT_INTF_CTRL_CONFIG registers aren't present
@@ -1625,6 +1261,20 @@
 		kgsl_mh_start(device);
 	}
 
+	/* Assign correct RBBM status register to hang detect regs
+	 */
+	ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
+
+	/* Add A3XX specific registers for hang detection */
+	if (adreno_is_a3xx(adreno_dev)) {
+		ft_detect_regs[6] = A3XX_RBBM_PERFCTR_SP_7_LO;
+		ft_detect_regs[7] = A3XX_RBBM_PERFCTR_SP_7_HI;
+		ft_detect_regs[8] = A3XX_RBBM_PERFCTR_SP_6_LO;
+		ft_detect_regs[9] = A3XX_RBBM_PERFCTR_SP_6_HI;
+		ft_detect_regs[10] = A3XX_RBBM_PERFCTR_SP_5_LO;
+		ft_detect_regs[11] = A3XX_RBBM_PERFCTR_SP_5_HI;
+	}
+
 	status = kgsl_mmu_start(device);
 	if (status)
 		goto error_clk_off;
@@ -1641,30 +1291,22 @@
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
 	device->ftbl->irqctrl(device, 1);
 
-	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
-	if (status)
-		goto error_irq_off;
+	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram);
+	if (status == 0) {
+		/* While fault tolerance is on we do not want timer to
+		 * fire and attempt to change any device state */
+		if (KGSL_STATE_DUMP_AND_FT != device->state)
+			mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
+		return 0;
+	}
 
-	/* While fault tolerance is on we do not want timer to
-	 * fire and attempt to change any device state */
-	if (KGSL_STATE_DUMP_AND_FT != device->state)
-		mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
-
-	adreno_perfcounter_start(adreno_dev);
-
-	device->reset_counter++;
-
-	return 0;
-
-error_irq_off:
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 
 error_mmu_off:
 	kgsl_mmu_stop(&device->mmu);
 
 error_clk_off:
-	if (KGSL_STATE_DUMP_AND_FT != device->state)
-		kgsl_pwrctrl_disable(device);
+	kgsl_pwrctrl_disable(device);
 
 	return status;
 }
@@ -1688,8 +1330,6 @@
 	/* Power down the device */
 	kgsl_pwrctrl_disable(device);
 
-	kgsl_cffdump_close(device->id);
-
 	return 0;
 }
 
@@ -1968,6 +1608,7 @@
 				rb->buffer_desc.size);
 		return;
 	}
+
 	ft_data->status = 0;
 
 	/* find the start of bad command sequence in rb */
@@ -1984,8 +1625,8 @@
 
 	ft_data->ft_policy = adreno_dev->ft_policy;
 
-	if (!ft_data->ft_policy)
-		ft_data->ft_policy = KGSL_FT_DEFAULT_POLICY;
+	if (!adreno_dev->ft_policy)
+		adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
 
 	ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
 					ft_data->global_eop + 1, false);
@@ -2005,7 +1646,6 @@
 			if (ret) {
 				KGSL_FT_ERR(device,
 				"Start not found for replay IB sequence\n");
-				ret = 0;
 				return;
 			}
 			ft_data->start_of_replay_cmds = rb_rptr;
@@ -2044,7 +1684,7 @@
 
 static int
 _adreno_ft_restart_device(struct kgsl_device *device,
-			   struct kgsl_context *context)
+		   struct kgsl_context *context)
 {
 
 	struct adreno_context *adreno_context = context->devctxt;
@@ -2055,12 +1695,7 @@
 		return 1;
 	}
 
-	if (adreno_init(device)) {
-		KGSL_FT_ERR(device, "Device init failed\n");
-		return 1;
-	}
-
-	if (adreno_start(device)) {
+	if (adreno_start(device, true)) {
 		KGSL_FT_ERR(device, "Device start failed\n");
 		return 1;
 	}
@@ -2156,7 +1791,6 @@
 	return ret;
 }
 
-
 static int
 _adreno_ft(struct kgsl_device *device,
 			struct adreno_ft_data *ft_data)
@@ -2184,6 +1818,7 @@
 		context->wait_on_invalid_ts = false;
 
 		if (!(adreno_context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
+			ft_data->status = 1;
 			KGSL_FT_ERR(device, "Fault tolerance not supported\n");
 			goto play_good_cmds;
 		}
@@ -2201,7 +1836,8 @@
 
 	}
 
-	/* Check if we detected a long running IB, if false return */
+	/* Check if we detected a long running IB,
+	 * if true do not attempt replay of bad cmds */
 	if (adreno_dev->long_ib) {
 		long_ib = _adreno_check_long_ib(device);
 		if (!long_ib) {
@@ -2218,6 +1854,7 @@
 
 	/* If long IB detected do not attempt replay of bad cmds */
 	if (long_ib) {
+		ft_data->status = 1;
 		_adreno_debug_ft_info(device, ft_data);
 		goto play_good_cmds;
 	}
@@ -2450,11 +2087,6 @@
 
 		/* Get the fault tolerance data as soon as hang is detected */
 		adreno_setup_ft_data(device, &ft_data);
-		/*
-		 * Trigger an automatic dump of the state to
-		 * the console
-		 */
-		kgsl_postmortem_dump(device, 0);
 
 		/*
 		 * If long ib is detected, do not attempt postmortem or
@@ -2881,23 +2513,12 @@
 	return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL;
 }
 
-/**
- * adreno_read - General read function to read adreno device memory
- * @device - Pointer to the GPU device struct (for adreno device)
- * @base - Base address (kernel virtual) where the device memory is mapped
- * @offsetwords - Offset in words from the base address, of the memory that
- * is to be read
- * @value - Value read from the device memory
- * @mem_len - Length of the device memory mapped to the kernel
- */
-static void adreno_read(struct kgsl_device *device, void *base,
-		unsigned int offsetwords, unsigned int *value,
-		unsigned int mem_len)
+void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
+				unsigned int *value)
 {
-
 	unsigned int *reg;
-	BUG_ON(offsetwords*sizeof(uint32_t) >= mem_len);
-	reg = (unsigned int *)(base + (offsetwords << 2));
+	BUG_ON(offsetwords*sizeof(uint32_t) >= device->reg_len);
+	reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
 
 	if (!in_interrupt())
 		kgsl_pre_hwaccess(device);
@@ -2908,31 +2529,6 @@
 	rmb();
 }
 
-/**
- * adreno_regread - Used to read adreno device registers
- * @offsetwords - Word (4 Bytes) offset to the register to be read
- * @value - Value read from device register
- */
-void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
-	unsigned int *value)
-{
-	adreno_read(device, device->reg_virt, offsetwords, value,
-						device->reg_len);
-}
-
-/**
- * adreno_shadermem_regread - Used to read GPU (adreno) shader memory
- * @device - GPU device whose shader memory is to be read
- * @offsetwords - Offset in words, of the shader memory address to be read
- * @value - Pointer to where the read shader mem value is to be stored
- */
-void adreno_shadermem_regread(struct kgsl_device *device,
-	unsigned int offsetwords, unsigned int *value)
-{
-	adreno_read(device, device->shader_mem_virt, offsetwords, value,
-					device->shader_mem_len);
-}
-
 void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
 				unsigned int value)
 {
@@ -2943,8 +2539,6 @@
 	if (!in_interrupt())
 		kgsl_pre_hwaccess(device);
 
-	trace_kgsl_regwrite(device, offsetwords, value);
-
 	kgsl_cffdump_regwrite(device->id, offsetwords << 2, value);
 	reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
 
@@ -3019,7 +2613,6 @@
 		kgsl_sharedmem_writel(&device->memstore,
 				KGSL_MEMSTORE_OFFSET(context_id,
 					ts_cmp_enable), enableflag);
-
 		/* Make sure the memstore write gets posted */
 		wmb();
 
@@ -3029,10 +2622,9 @@
 		 * get an interrupt
 		 */
 
-		if (context && device->state != KGSL_STATE_SLUMBER) {
+		if (context && device->state != KGSL_STATE_SLUMBER)
 			adreno_ringbuffer_issuecmds(device, context->devctxt,
 					KGSL_CMD_FLAGS_NONE, NULL, 0);
-		}
 	}
 
 	return 0;
@@ -3096,13 +2688,10 @@
 	if (!adreno_dev->long_ib_detect)
 		long_ib_detected = 0;
 
-	if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED))
-		return 0;
-
 	if (is_adreno_rbbm_status_idle(device)) {
 
 		/*
-		 * On A2XX if the RPTR != WPTR and the device is idle, then
+		 * On A20X if the RPTR != WPTR and the device is idle, then
 		 * the last write to WPTR probably failed to latch so write it
 		 * again
 		 */
@@ -3143,7 +2732,7 @@
 			&curr_global_ts,
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 			eoptimestamp));
-	/* Make sure the memstore read has posted */
+
 	mb();
 
 	if (curr_global_ts == prev_global_ts) {
@@ -3154,8 +2743,6 @@
 				&curr_context_id,
 				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 				current_context));
-			/* Make sure the memstore read has posted */
-			mb();
 			context = idr_find(&device->context_idr,
 				curr_context_id);
 			if (context != NULL) {
@@ -3167,6 +2754,8 @@
 			}
 		}
 
+		mb();
+
 		if (curr_context != NULL) {
 
 			curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART;
@@ -3275,8 +2864,6 @@
 	ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
 
 	adreno_regread(device, REG_CP_RB_RPTR, &rptr);
-
-	/* Make sure timestamp check finished before triggering a hang */
 	mb();
 
 	KGSL_DRV_WARN(device,
@@ -3504,8 +3091,7 @@
 		break;
 	}
 	case KGSL_TIMESTAMP_CONSUMED:
-		kgsl_sharedmem_readl(&device->memstore, &timestamp,
-			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp));
+		adreno_regread(device, REG_CP_TIMESTAMP, &timestamp);
 		break;
 	case KGSL_TIMESTAMP_RETIRED:
 		kgsl_sharedmem_readl(&device->memstore, &timestamp,
@@ -3521,55 +3107,27 @@
 static long adreno_ioctl(struct kgsl_device_private *dev_priv,
 			      unsigned int cmd, void *data)
 {
-	struct kgsl_device *device = dev_priv->device;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int result = 0;
+	struct kgsl_drawctxt_set_bin_base_offset *binbase;
+	struct kgsl_context *context;
 
 	switch (cmd) {
-	case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET: {
-		struct kgsl_drawctxt_set_bin_base_offset *binbase = data;
-		struct kgsl_context *context;
-
+	case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET:
 		binbase = data;
 
 		context = kgsl_find_context(dev_priv, binbase->drawctxt_id);
 		if (context) {
 			adreno_drawctxt_set_bin_base_offset(
-				device, context, binbase->offset);
+				dev_priv->device, context, binbase->offset);
 		} else {
 			result = -EINVAL;
-			KGSL_DRV_ERR(device,
+			KGSL_DRV_ERR(dev_priv->device,
 				"invalid drawctxt drawctxt_id %d "
 				"device_id=%d\n",
-				binbase->drawctxt_id, device->id);
+				binbase->drawctxt_id, dev_priv->device->id);
 		}
 		break;
-	}
-	case IOCTL_KGSL_PERFCOUNTER_GET: {
-		struct kgsl_perfcounter_get *get = data;
-		result = adreno_perfcounter_get(adreno_dev, get->groupid,
-			get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
-		break;
-	}
-	case IOCTL_KGSL_PERFCOUNTER_PUT: {
-		struct kgsl_perfcounter_put *put = data;
-		result = adreno_perfcounter_put(adreno_dev, put->groupid,
-			put->countable);
-		break;
-	}
-	case IOCTL_KGSL_PERFCOUNTER_QUERY: {
-		struct kgsl_perfcounter_query *query = data;
-		result = adreno_perfcounter_query_group(adreno_dev,
-			query->groupid, query->countables,
-			query->count, &query->max_counters);
-		break;
-	}
-	case IOCTL_KGSL_PERFCOUNTER_READ: {
-		struct kgsl_perfcounter_read *read = data;
-		result = adreno_perfcounter_read_group(adreno_dev,
-			read->reads, read->count);
-		break;
-	}
+
 	default:
 		KGSL_DRV_INFO(dev_priv->device,
 			"invalid ioctl code %08x\n", cmd);
@@ -3646,7 +3204,6 @@
 	.idle = adreno_idle,
 	.isidle = adreno_isidle,
 	.suspend_context = adreno_suspend_context,
-	.init = adreno_init,
 	.start = adreno_start,
 	.stop = adreno_stop,
 	.getproperty = adreno_getproperty,
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 90d6027..8d16bfa 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -39,7 +39,6 @@
 /* Command identifiers */
 #define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
 #define KGSL_CMD_IDENTIFIER		0x2EEDFACE
-#define KGSL_CMD_INTERNAL_IDENTIFIER	0x2EEDD00D
 #define KGSL_START_OF_IB_IDENTIFIER	0x2EADEABE
 #define KGSL_END_OF_IB_IDENTIFIER	0x2ABEDEAD
 #define KGSL_END_OF_FRAME_IDENTIFIER	0x2E0F2E0F
@@ -73,10 +72,8 @@
 	ADRENO_REV_A220 = 220,
 	ADRENO_REV_A225 = 225,
 	ADRENO_REV_A305 = 305,
-	ADRENO_REV_A305C = 306,
 	ADRENO_REV_A320 = 320,
 	ADRENO_REV_A330 = 330,
-	ADRENO_REV_A305B = 335,
 };
 
 struct adreno_gpudev;
@@ -106,6 +103,7 @@
 	unsigned int ib_check_level;
 	unsigned int fast_hang_detect;
 	unsigned int ft_policy;
+	unsigned int ft_user_control;
 	unsigned int long_ib_detect;
 	unsigned int long_ib;
 	unsigned int long_ib_ts;
@@ -113,45 +111,6 @@
 	unsigned int gpulist_index;
 	struct ocmem_buf *ocmem_hdl;
 	unsigned int ocmem_base;
-	unsigned int gpu_cycles;
-};
-
-#define PERFCOUNTER_FLAG_NONE 0x0
-#define PERFCOUNTER_FLAG_KERNEL 0x1
-
-/* Structs to maintain the list of active performance counters */
-
-/**
- * struct adreno_perfcount_register: register state
- * @countable: countable the register holds
- * @refcount: number of users of the register
- * @offset: register hardware offset
- */
-struct adreno_perfcount_register {
-	unsigned int countable;
-	unsigned int refcount;
-	unsigned int offset;
-	unsigned int flags;
-};
-
-/**
- * struct adreno_perfcount_group: registers for a hardware group
- * @regs: available registers for this group
- * @reg_count: total registers for this group
- */
-struct adreno_perfcount_group {
-	struct adreno_perfcount_register *regs;
-	unsigned int reg_count;
-};
-
-/**
- * adreno_perfcounts: all available perfcounter groups
- * @groups: available groups for this device
- * @group_count: total groups for this device
- */
-struct adreno_perfcounters {
-	struct adreno_perfcount_group *groups;
-	unsigned int group_count;
 };
 
 struct adreno_gpudev {
@@ -165,8 +124,6 @@
 	/* keeps track of when we need to execute the draw workaround code */
 	int ctx_switches_since_last_draw;
 
-	struct adreno_perfcounters *perfcounters;
-
 	/* GPU specific function hooks */
 	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
 	void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
@@ -177,15 +134,9 @@
 	void (*irq_control)(struct adreno_device *, int);
 	unsigned int (*irq_pending)(struct adreno_device *);
 	void * (*snapshot)(struct adreno_device *, void *, int *, int);
-	int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
-	void (*perfcounter_init)(struct adreno_device *);
+	void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
 	void (*start)(struct adreno_device *);
 	unsigned int (*busy_cycles)(struct adreno_device *);
-	void (*perfcounter_enable)(struct adreno_device *, unsigned int group,
-		unsigned int counter, unsigned int countable);
-	uint64_t (*perfcounter_read)(struct adreno_device *adreno_dev,
-		unsigned int group, unsigned int counter,
-		unsigned int offset);
 };
 
 /*
@@ -275,13 +226,7 @@
 void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
 				unsigned int value);
 
-void adreno_shadermem_regread(struct kgsl_device *device,
-						unsigned int offsetwords,
-						unsigned int *value);
-
 int adreno_dump(struct kgsl_device *device, int manual);
-unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
-							*adreno_dev);
 
 struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
 						unsigned int pt_base,
@@ -305,13 +250,6 @@
 unsigned int adreno_ft_detect(struct kgsl_device *device,
 						unsigned int *prev_reg_val);
 
-int adreno_perfcounter_get(struct adreno_device *adreno_dev,
-	unsigned int groupid, unsigned int countable, unsigned int *offset,
-	unsigned int flags);
-
-int adreno_perfcounter_put(struct adreno_device *adreno_dev,
-	unsigned int groupid, unsigned int countable);
-
 static inline int adreno_is_a200(struct adreno_device *adreno_dev)
 {
 	return (adreno_dev->gpurev == ADRENO_REV_A200);
@@ -363,16 +301,6 @@
 	return (adreno_dev->gpurev == ADRENO_REV_A305);
 }
 
-static inline int adreno_is_a305b(struct adreno_device *adreno_dev)
-{
-	return (adreno_dev->gpurev == ADRENO_REV_A305B);
-}
-
-static inline int adreno_is_a305c(struct adreno_device *adreno_dev)
-{
-	return (adreno_dev->gpurev == ADRENO_REV_A305C);
-}
-
 static inline int adreno_is_a320(struct adreno_device *adreno_dev)
 {
 	return (adreno_dev->gpurev == ADRENO_REV_A320);
@@ -383,12 +311,6 @@
 	return (adreno_dev->gpurev == ADRENO_REV_A330);
 }
 
-static inline int adreno_is_a330v2(struct adreno_device *adreno_dev)
-{
-	return ((adreno_dev->gpurev == ADRENO_REV_A330) &&
-		(ADRENO_CHIPID_PATCH(adreno_dev->chip_id) > 0));
-}
-
 static inline int adreno_rb_ctxtswitch(unsigned int *cmd)
 {
 	return (cmd[0] == cp_nop_packet(1) &&
@@ -494,13 +416,12 @@
 	unsigned int *start = cmds;
 
 	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
-	*cmds++ = 0;
+	*cmds++ = 0x00000000;
 
 	if ((adreno_dev->gpurev == ADRENO_REV_A305) ||
-		(adreno_dev->gpurev == ADRENO_REV_A305C) ||
 		(adreno_dev->gpurev == ADRENO_REV_A320)) {
 		*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
-		*cmds++ = 0;
+		*cmds++ = 0x00000000;
 	}
 
 	return cmds - start;
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index dd9bdc3..aa14330 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1515,26 +1515,18 @@
 			"Current active context has caused gpu hang\n");
 
 	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-		kgsl_cffdump_syncmem(NULL, &context->gpustate,
-			context->reg_save[1],
-			context->reg_save[2] << 2, true);
+
 		/* save registers and constants. */
 		adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->reg_save, 3);
 
 		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
-			kgsl_cffdump_syncmem(NULL, &context->gpustate,
-				context->shader_save[1],
-				context->shader_save[2] << 2, true);
 			/* save shader partitioning and instructions. */
 			adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_PMODE,
 				context->shader_save, 3);
 
-			kgsl_cffdump_syncmem(NULL, &context->gpustate,
-				context->shader_fixup[1],
-				context->shader_fixup[2] << 2, true);
 			/*
 			 * fixup shader partitioning parameter for
 			 *  SET_SHADER_BASES.
@@ -1549,9 +1541,6 @@
 
 	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
 	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
-		kgsl_cffdump_syncmem(NULL, &context->gpustate,
-			context->context_gmem_shadow.gmem_save[1],
-			context->context_gmem_shadow.gmem_save[2] << 2, true);
 		/* save gmem.
 		 * (note: changes shader. shader must already be saved.)
 		 */
@@ -1559,10 +1548,6 @@
 			KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_save, 3);
 
-		kgsl_cffdump_syncmem(NULL, &context->gpustate,
-			context->chicken_restore[1],
-			context->chicken_restore[2] << 2, true);
-
 		/* Restore TP0_CHICKEN */
 		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
 			adreno_ringbuffer_issuecmds(device, context,
@@ -1589,6 +1574,8 @@
 		return;
 	}
 
+	KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
+
 	cmds[0] = cp_nop_packet(1);
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
@@ -1599,24 +1586,21 @@
 					cmds, 5);
 	kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
 
+#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
+	kgsl_cffdump_syncmem(NULL, &context->gpustate,
+		context->gpustate.gpuaddr, LCC_SHADOW_SIZE +
+		REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false);
+#endif
+
 	/* restore gmem.
 	 *  (note: changes shader. shader must not already be restored.)
 	 */
 	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
-		kgsl_cffdump_syncmem(NULL, &context->gpustate,
-			context->context_gmem_shadow.gmem_restore[1],
-			context->context_gmem_shadow.gmem_restore[2] << 2,
-			true);
-
 		adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_restore, 3);
 
 		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-			kgsl_cffdump_syncmem(NULL, &context->gpustate,
-				context->chicken_restore[1],
-				context->chicken_restore[2] << 2, true);
-
 			/* Restore TP0_CHICKEN */
 			adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
@@ -1627,9 +1611,6 @@
 	}
 
 	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-		kgsl_cffdump_syncmem(NULL, &context->gpustate,
-			context->reg_restore[1],
-			context->reg_restore[2] << 2, true);
 
 		/* restore registers and constants. */
 		adreno_ringbuffer_issuecmds(device, context,
@@ -1637,10 +1618,6 @@
 
 		/* restore shader instructions & partitioning. */
 		if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
-			kgsl_cffdump_syncmem(NULL, &context->gpustate,
-				context->shader_restore[1],
-				context->shader_restore[2] << 2, true);
-
 			adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->shader_restore, 3);
@@ -1750,8 +1727,12 @@
 	adreno_regwrite(device, REG_CP_INT_ACK, status);
 
 	if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) {
+		KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n");
 		queue_work(device->work_queue, &device->ts_expired_ws);
 		wake_up_interruptible_all(&device->wait_queue);
+		atomic_notifier_call_chain(&(device->ts_notifier_list),
+					   device->id,
+					   NULL);
 	}
 }
 
@@ -1850,16 +1831,13 @@
 		 MASTER_INT_SIGNAL__RBBM_INT_STAT)) ? 1 : 0;
 }
 
-static int a2xx_rb_init(struct adreno_device *adreno_dev,
+static void a2xx_rb_init(struct adreno_device *adreno_dev,
 			struct adreno_ringbuffer *rb)
 {
 	unsigned int *cmds, cmds_gpu;
 
 	/* ME_INIT */
 	cmds = adreno_ringbuffer_allocspace(rb, NULL, 19);
-	if (cmds == NULL)
-		return -ENOMEM;
-
 	cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19);
 
 	GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18));
@@ -1912,8 +1890,6 @@
 	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
 
 	adreno_ringbuffer_submit(rb);
-
-	return 0;
 }
 
 static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev)
diff --git a/drivers/gpu/msm/adreno_a2xx_snapshot.c b/drivers/gpu/msm/adreno_a2xx_snapshot.c
index 2c86f82..75795b1 100644
--- a/drivers/gpu/msm/adreno_a2xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a2xx_snapshot.c
@@ -224,31 +224,6 @@
 	return DEBUG_SECTION_SZ(MIUDEBUG_COUNT);
 }
 
-/* Snapshot the istore memory */
-static int a2xx_snapshot_istore(struct kgsl_device *device, void *snapshot,
-	int remain, void *priv)
-{
-	struct kgsl_snapshot_istore *header = snapshot;
-	unsigned int *data = snapshot + sizeof(*header);
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	int count, i;
-
-	count = adreno_dev->istore_size * adreno_dev->instruction_size;
-
-	if (remain < (count * 4) + sizeof(*header)) {
-		KGSL_DRV_ERR(device,
-			"snapshot: Not enough memory for the istore section");
-		return 0;
-	}
-
-	header->count = adreno_dev->istore_size;
-
-	for (i = 0; i < count; i++)
-		kgsl_regread(device, ADRENO_ISTORE_START + i, &data[i]);
-
-	return (count * 4) + sizeof(*header);
-}
-
 /* A2XX GPU snapshot function - this is where all of the A2XX specific
  * bits and pieces are grabbed into the snapshot memory
  */
@@ -363,18 +338,6 @@
 		}
 	}
 
-	/*
-	 * Only dump the istore on a hang - reading it on a running system
-	 * has a non zero chance of hanging the GPU.
-	 */
-
-	if (adreno_is_a2xx(adreno_dev) && hang) {
-		snapshot = kgsl_snapshot_add_section(device,
-			KGSL_SNAPSHOT_SECTION_ISTORE, snapshot, remain,
-			a2xx_snapshot_istore, NULL);
-	}
-
-
 	/* Reset the clock gating */
 	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, pmoverride);
 
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 13c723a..3d9ec6d 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -52,8 +52,8 @@
 	0x2240, 0x227e,
 	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
 	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
-	0x22ff, 0x22ff, 0x2340, 0x2343,
-	0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
+	0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
 	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
 	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
 	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
@@ -61,8 +61,8 @@
 	0x25f0, 0x25f0,
 	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
 	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
-	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
-	0x300C, 0x300E, 0x301C, 0x301D,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
+	0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
 	0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
 	0x303C, 0x303C, 0x305E, 0x305F,
 };
@@ -445,25 +445,6 @@
 	tmp_ctx.cmd = cmd;
 }
 
-unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
-							*adreno_dev)
-{
-	if (adreno_is_a305(adreno_dev))
-		return A305_RBBM_CLOCK_CTL_DEFAULT;
-	else if (adreno_is_a305c(adreno_dev))
-		return A305C_RBBM_CLOCK_CTL_DEFAULT;
-	else if (adreno_is_a320(adreno_dev))
-		return A320_RBBM_CLOCK_CTL_DEFAULT;
-	else if (adreno_is_a330v2(adreno_dev))
-		return A330v2_RBBM_CLOCK_CTL_DEFAULT;
-	else if (adreno_is_a330(adreno_dev))
-		return A330_RBBM_CLOCK_CTL_DEFAULT;
-	else if (adreno_is_a305b(adreno_dev))
-		return A305B_RBBM_CLOCK_CTL_DEFAULT;
-
-	BUG_ON(1);
-}
-
 /* Copy GMEM contents to system memory shadow. */
 static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
 					 struct adreno_context *drawctxt,
@@ -473,7 +454,7 @@
 	unsigned int *start = cmds;
 
 	*cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
-	*cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
 
 	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
 	*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
@@ -1269,7 +1250,7 @@
 	unsigned int *start = cmds;
 
 	*cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
-	*cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
 
 	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
 	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
@@ -2419,11 +2400,6 @@
 		 * already be saved.)
 		 */
 
-		kgsl_cffdump_syncmem(NULL,
-			&context->gpustate,
-			context->context_gmem_shadow.gmem_save[1],
-			context->context_gmem_shadow.gmem_save[2] << 2, true);
-
 		adreno_ringbuffer_issuecmds(device, context,
 					KGSL_CMD_FLAGS_PMODE,
 					    context->context_gmem_shadow.
@@ -2445,6 +2421,8 @@
 		return;
 	}
 
+	KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
+
 	cmds[0] = cp_nop_packet(1);
 	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
 	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
@@ -2461,12 +2439,6 @@
 	 */
 
 	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
-		kgsl_cffdump_syncmem(NULL,
-			&context->gpustate,
-			context->context_gmem_shadow.gmem_restore[1],
-			context->context_gmem_shadow.gmem_restore[2] << 2,
-			true);
-
 		adreno_ringbuffer_issuecmds(device, context,
 					KGSL_CMD_FLAGS_PMODE,
 					    context->context_gmem_shadow.
@@ -2499,14 +2471,11 @@
 	}
 }
 
-static int a3xx_rb_init(struct adreno_device *adreno_dev,
+static void a3xx_rb_init(struct adreno_device *adreno_dev,
 			 struct adreno_ringbuffer *rb)
 {
 	unsigned int *cmds, cmds_gpu;
 	cmds = adreno_ringbuffer_allocspace(rb, NULL, 18);
-	if (cmds == NULL)
-		return -ENOMEM;
-
 	cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
 
 	GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
@@ -2530,8 +2499,6 @@
 	GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
 
 	adreno_ringbuffer_submit(rb);
-
-	return 0;
 }
 
 static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
@@ -2614,213 +2581,9 @@
 
 	/* Schedule work to free mem and issue ibs */
 	queue_work(device->work_queue, &device->ts_expired_ws);
-}
 
-/**
- * struct a3xx_perfcounter_register - Define a performance counter register
- * @load_bit: the bit to set in RBBM_LOAD_CMD0/RBBM_LOAD_CMD1 to force the RBBM
- * to load the reset value into the appropriate counter
- * @select: The dword offset of the register to write the selected
- * countable into
- */
-
-struct a3xx_perfcounter_register {
-	unsigned int load_bit;
-	unsigned int select;
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_cp[] = {
-	{ 0, A3XX_CP_PERFCOUNTER_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rbbm[] = {
-	{ 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
-	{ 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_pc[] = {
-	{ 3, A3XX_PC_PERFCOUNTER0_SELECT },
-	{ 4, A3XX_PC_PERFCOUNTER1_SELECT },
-	{ 5, A3XX_PC_PERFCOUNTER2_SELECT },
-	{ 6, A3XX_PC_PERFCOUNTER3_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vfd[] = {
-	{ 7, A3XX_VFD_PERFCOUNTER0_SELECT },
-	{ 8, A3XX_VFD_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_hlsq[] = {
-	{ 9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
-	{ 10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
-	{ 11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
-	{ 12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
-	{ 13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
-	{ 14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vpc[] = {
-	{ 15, A3XX_VPC_PERFCOUNTER0_SELECT },
-	{ 16, A3XX_VPC_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tse[] = {
-	{ 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
-	{ 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_ras[] = {
-	{ 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
-	{ 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_uche[] = {
-	{ 21, A3XX_UCHE_PERFCOUNTER0_SELECT },
-	{ 22, A3XX_UCHE_PERFCOUNTER1_SELECT },
-	{ 23, A3XX_UCHE_PERFCOUNTER2_SELECT },
-	{ 24, A3XX_UCHE_PERFCOUNTER3_SELECT },
-	{ 25, A3XX_UCHE_PERFCOUNTER4_SELECT },
-	{ 26, A3XX_UCHE_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tp[] = {
-	{ 27, A3XX_TP_PERFCOUNTER0_SELECT },
-	{ 28, A3XX_TP_PERFCOUNTER1_SELECT },
-	{ 29, A3XX_TP_PERFCOUNTER2_SELECT },
-	{ 30, A3XX_TP_PERFCOUNTER3_SELECT },
-	{ 31, A3XX_TP_PERFCOUNTER4_SELECT },
-	{ 32, A3XX_TP_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_sp[] = {
-	{ 33, A3XX_SP_PERFCOUNTER0_SELECT },
-	{ 34, A3XX_SP_PERFCOUNTER1_SELECT },
-	{ 35, A3XX_SP_PERFCOUNTER2_SELECT },
-	{ 36, A3XX_SP_PERFCOUNTER3_SELECT },
-	{ 37, A3XX_SP_PERFCOUNTER4_SELECT },
-	{ 38, A3XX_SP_PERFCOUNTER5_SELECT },
-	{ 39, A3XX_SP_PERFCOUNTER6_SELECT },
-	{ 40, A3XX_SP_PERFCOUNTER7_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rb[] = {
-	{ 41, A3XX_RB_PERFCOUNTER0_SELECT },
-	{ 42, A3XX_RB_PERFCOUNTER1_SELECT },
-};
-
-#define REGCOUNTER_GROUP(_x) { (_x), ARRAY_SIZE((_x)) }
-
-static struct {
-	struct a3xx_perfcounter_register *regs;
-	int count;
-} a3xx_perfcounter_reglist[] = {
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_cp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_rbbm),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_pc),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_vfd),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_hlsq),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_vpc),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_tse),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_ras),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_uche),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_tp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_sp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_rb),
-};
-
-static void a3xx_perfcounter_enable_pwr(struct kgsl_device *device,
-	unsigned int countable)
-{
-	unsigned int in, out;
-
-	adreno_regread(device, A3XX_RBBM_RBBM_CTL, &in);
-
-	if (countable == 0)
-		out = in | RBBM_RBBM_CTL_RESET_PWR_CTR0;
-	else
-		out = in | RBBM_RBBM_CTL_RESET_PWR_CTR1;
-
-	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
-
-	if (countable == 0)
-		out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR0;
-	else
-		out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
-
-	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
-
-	return;
-}
-
-/*
- * a3xx_perfcounter_enable - Configure a performance counter for a countable
- * @adreno_dev -  Adreno device to configure
- * @group - Desired performance counter group
- * @counter - Desired performance counter in the group
- * @countable - Desired countable
- *
- * Physically set up a counter within a group with the desired countable
- */
-
-static void a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
-	unsigned int group, unsigned int counter, unsigned int countable)
-{
-	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int val = 0;
-	struct a3xx_perfcounter_register *reg;
-
-	if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
-		return;
-
-	if (counter > a3xx_perfcounter_reglist[group].count)
-		return;
-
-	/* Special case - power */
-	if (group == KGSL_PERFCOUNTER_GROUP_PWR)
-		return a3xx_perfcounter_enable_pwr(device, countable);
-
-	reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
-
-	/* Select the desired perfcounter */
-	adreno_regwrite(device, reg->select, countable);
-
-	if (reg->load_bit < 32) {
-		val = 1 << reg->load_bit;
-		adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val);
-	} else {
-		val  = 1 << (reg->load_bit - 32);
-		adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val);
-	}
-}
-
-static uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
-	unsigned int group, unsigned int counter,
-	unsigned int offset)
-{
-	struct kgsl_device *device = &adreno_dev->dev;
-	struct a3xx_perfcounter_register *reg = NULL;
-	unsigned int lo = 0, hi = 0;
-	unsigned int val;
-
-	if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
-		return 0;
-
-	reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
-
-	/* Freeze the counter */
-	adreno_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
-	val &= ~reg->load_bit;
-	adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
-
-	/* Read the values */
-	adreno_regread(device, offset, &lo);
-	adreno_regread(device, offset + 1, &hi);
-
-	/* Re-Enable the counter */
-	val |= reg->load_bit;
-	adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
-
-	return (((uint64_t) hi) << 32) | lo;
+	atomic_notifier_call_chain(&device->ts_notifier_list,
+				   device->id, NULL);
 }
 
 #define A3XX_IRQ_CALLBACK(_c) { .func = _c }
@@ -2924,22 +2687,26 @@
 static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int val;
-	unsigned int ret = 0;
+	unsigned int reg, val;
+
+	/* Freeze the counter */
+	adreno_regread(device, A3XX_RBBM_RBBM_CTL, &reg);
+	reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
 
 	/* Read the value */
 	adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
 
-	/* Return 0 for the first read */
-	if (adreno_dev->gpu_cycles != 0) {
-		if (val < adreno_dev->gpu_cycles)
-			ret = (0xFFFFFFFF - adreno_dev->gpu_cycles) + val;
-		else
-			ret = val - adreno_dev->gpu_cycles;
-	}
+	/* Reset the counter */
+	reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
 
-	adreno_dev->gpu_cycles = val;
-	return ret;
+	/* Re-enable the counter */
+	reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
+	reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+
+	return val;
 }
 
 struct a3xx_vbif_data {
@@ -2967,29 +2734,6 @@
 	{0, 0},
 };
 
-static struct a3xx_vbif_data a305b_vbif[] = {
-	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818 },
-	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818 },
-	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018 },
-	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018 },
-	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303 },
-	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
-	{0, 0},
-};
-
-static struct a3xx_vbif_data a305c_vbif[] = {
-	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00101010 },
-	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00101010 },
-	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
-	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
-	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000101 },
-	{ A3XX_VBIF_ARB_CTL, 0x00000010 },
-	/* Set up AOOO */
-	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x00000007 },
-	{ A3XX_VBIF_OUT_AXI_AOOO, 0x00070007 },
-	{0, 0},
-};
-
 static struct a3xx_vbif_data a320_vbif[] = {
 	/* Set up 16 deep read/write request queues */
 	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
@@ -3040,81 +2784,17 @@
 	{0, 0},
 };
 
-/*
- * Most of the VBIF registers on 8974v2 have the correct values at power on, so
- * we won't modify those if we don't need to
- */
-static struct a3xx_vbif_data a330v2_vbif[] = {
-	/* Enable 1k sort */
-	{ A3XX_VBIF_ABIT_SORT, 0x0001003F },
-	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
-	/* Enable WR-REQ */
-	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
-	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
-	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
-	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
-	{0, 0},
-};
-
-static struct {
-	int(*devfunc)(struct adreno_device *);
-	struct a3xx_vbif_data *vbif;
-} a3xx_vbif_platforms[] = {
-	{ adreno_is_a305, a305_vbif },
-	{ adreno_is_a305c, a305c_vbif },
-	{ adreno_is_a320, a320_vbif },
-	/* A330v2 needs to be ahead of A330 so the right device matches */
-	{ adreno_is_a330v2, a330v2_vbif },
-	{ adreno_is_a330, a330_vbif },
-	{ adreno_is_a305b, a305b_vbif },
-};
-
-static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
-{
-	/*
-	 * Set SP to count SP_ALU_ACTIVE_CYCLES, it includes
-	 * all ALU instruction execution regardless precision or shader ID.
-	 * Set SP to count SP0_ICL1_MISSES, It counts
-	 * USP L1 instruction miss request.
-	 * Set SP to count SP_FS_FULL_ALU_INSTRUCTIONS, it
-	 * counts USP flow control instruction execution.
-	 * we will use this to augment our hang detection
-	 */
-	if (adreno_dev->fast_hang_detect) {
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
-			SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6],
-			PERFCOUNTER_FLAG_KERNEL);
-		ft_detect_regs[7] = ft_detect_regs[6] + 1;
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
-			SP0_ICL1_MISSES, &ft_detect_regs[8],
-			PERFCOUNTER_FLAG_KERNEL);
-		ft_detect_regs[9] = ft_detect_regs[8] + 1;
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
-			SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10],
-			PERFCOUNTER_FLAG_KERNEL);
-		ft_detect_regs[11] = ft_detect_regs[10] + 1;
-	}
-
-	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
-		SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL);
-
-	/* Reserve and start countable 1 in the PWR perfcounter group */
-	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
-			NULL, PERFCOUNTER_FLAG_KERNEL);
-}
-
 static void a3xx_start(struct adreno_device *adreno_dev)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
 	struct a3xx_vbif_data *vbif = NULL;
-	int i;
 
-	for (i = 0; i < ARRAY_SIZE(a3xx_vbif_platforms); i++) {
-		if (a3xx_vbif_platforms[i].devfunc(adreno_dev)) {
-			vbif = a3xx_vbif_platforms[i].vbif;
-			break;
-		}
-	}
+	if (adreno_is_a305(adreno_dev))
+		vbif = a305_vbif;
+	else if (adreno_is_a320(adreno_dev))
+		vbif = a320_vbif;
+	else if (adreno_is_a330(adreno_dev))
+		vbif = a330_vbif;
 
 	BUG_ON(vbif == NULL);
 
@@ -3152,18 +2832,10 @@
 
 	/* Enable Clock gating */
 	adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
-		adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
-
-	if (adreno_is_a330v2(adreno_dev))
-		adreno_regwrite(device, A3XX_RBBM_GPR0_CTL,
-			A330v2_RBBM_GPR0_CTL_DEFAULT);
-	else if (adreno_is_a330(adreno_dev))
-		adreno_regwrite(device, A3XX_RBBM_GPR0_CTL,
-			A330_RBBM_GPR0_CTL_DEFAULT);
+			A3XX_RBBM_CLOCK_CTL_DEFAULT);
 
 	/* Set the OCMEM base address for A330 */
-	if (adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)) {
+	if (adreno_is_a330(adreno_dev)) {
 		adreno_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
 			(unsigned int)(adreno_dev->ocmem_base >> 14));
 	}
@@ -3171,121 +2843,25 @@
 	/* Turn on performance counters */
 	adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
 
-	/* Turn on the GPU busy counter and let it run free */
-
-	adreno_dev->gpu_cycles = 0;
+	/*
+	 * Set SP perfcounter 5 to count SP_ALU_ACTIVE_CYCLES, it includes
+	 * all ALU instruction execution regardless precision or shader ID.
+	 * Set SP perfcounter 6 to count SP0_ICL1_MISSES, It counts
+	 * USP L1 instruction miss request.
+	 * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS, it
+	 * counts USP flow control instruction execution.
+	 * we will use this to augment our hang detection
+	 */
+	if (adreno_dev->fast_hang_detect) {
+		adreno_regwrite(device, A3XX_SP_PERFCOUNTER5_SELECT,
+			SP_ALU_ACTIVE_CYCLES);
+		adreno_regwrite(device, A3XX_SP_PERFCOUNTER6_SELECT,
+			SP0_ICL1_MISSES);
+		adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
+			SP_FS_CFLOW_INSTRUCTIONS);
+	}
 }
 
-/*
- * Define the available perfcounter groups - these get used by
- * adreno_perfcounter_get and adreno_perfcounter_put
- */
-
-static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_CP_0_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_1_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_2_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_3_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_1_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_2_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_3_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_4_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_5_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_1_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_2_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_3_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_4_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_5_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_6_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_7_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_1_LO, 0 },
-};
-
-static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, 0 },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_1_LO, 0 },
-};
-
-static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
-	{ a3xx_perfcounters_cp, ARRAY_SIZE(a3xx_perfcounters_cp) },
-	{ a3xx_perfcounters_rbbm, ARRAY_SIZE(a3xx_perfcounters_rbbm) },
-	{ a3xx_perfcounters_pc, ARRAY_SIZE(a3xx_perfcounters_pc) },
-	{ a3xx_perfcounters_vfd, ARRAY_SIZE(a3xx_perfcounters_vfd) },
-	{ a3xx_perfcounters_hlsq, ARRAY_SIZE(a3xx_perfcounters_hlsq) },
-	{ a3xx_perfcounters_vpc, ARRAY_SIZE(a3xx_perfcounters_vpc) },
-	{ a3xx_perfcounters_tse, ARRAY_SIZE(a3xx_perfcounters_tse) },
-	{ a3xx_perfcounters_ras, ARRAY_SIZE(a3xx_perfcounters_ras) },
-	{ a3xx_perfcounters_uche, ARRAY_SIZE(a3xx_perfcounters_uche) },
-	{ a3xx_perfcounters_tp, ARRAY_SIZE(a3xx_perfcounters_tp) },
-	{ a3xx_perfcounters_sp, ARRAY_SIZE(a3xx_perfcounters_sp) },
-	{ a3xx_perfcounters_rb, ARRAY_SIZE(a3xx_perfcounters_rb) },
-	{ a3xx_perfcounters_pwr, ARRAY_SIZE(a3xx_perfcounters_pwr) },
-};
-
-static struct adreno_perfcounters a3xx_perfcounters = {
-	a3xx_perfcounter_groups,
-	ARRAY_SIZE(a3xx_perfcounter_groups),
-};
-
 /* Defined in adreno_a3xx_snapshot.c */
 void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
 	int *remain, int hang);
@@ -3294,20 +2870,16 @@
 	.reg_rbbm_status = A3XX_RBBM_STATUS,
 	.reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
 	.reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
-	.perfcounters = &a3xx_perfcounters,
 
 	.ctxt_create = a3xx_drawctxt_create,
 	.ctxt_save = a3xx_drawctxt_save,
 	.ctxt_restore = a3xx_drawctxt_restore,
 	.ctxt_draw_workaround = NULL,
 	.rb_init = a3xx_rb_init,
-	.perfcounter_init = a3xx_perfcounter_init,
 	.irq_control = a3xx_irq_control,
 	.irq_handler = a3xx_irq_handler,
 	.irq_pending = a3xx_irq_pending,
 	.busy_cycles = a3xx_busy_cycles,
 	.start = a3xx_start,
 	.snapshot = a3xx_snapshot,
-	.perfcounter_enable = a3xx_perfcounter_enable,
-	.perfcounter_read = a3xx_perfcounter_read,
 };
diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c
index 58e3126..d9d5ec8 100644
--- a/drivers/gpu/msm/adreno_a3xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +11,6 @@
  *
  */
 
-#include <linux/io.h>
 #include "kgsl.h"
 #include "adreno.h"
 #include "kgsl_snapshot.h"
@@ -20,43 +19,14 @@
 #define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
 		+ sizeof(struct kgsl_snapshot_debug))
 
-/* Shader memory size in words */
 #define SHADER_MEMORY_SIZE 0x4000
 
-/**
- * _rbbm_debug_bus_read - Helper function to read data from the RBBM
- * debug bus.
- * @device - GPU device to read/write registers
- * @block_id - Debug bus block to read from
- * @index - Index in the debug bus block to read
- * @ret - Value of the register read
- */
-static void _rbbm_debug_bus_read(struct kgsl_device *device,
-	unsigned int block_id, unsigned int index, unsigned int *val)
-{
-	unsigned int block = (block_id << 8) | 1 << 16;
-	adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
-	adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
-}
-
-/**
- * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
- * memory to the snapshot buffer.
- * @device - GPU device whose shader memory is to be dumped
- * @snapshot - Pointer to binary snapshot data blob being made
- * @remain - Number of remaining bytes in the snapshot blob
- * @priv - Unused parameter
- */
 static int a3xx_snapshot_shader_memory(struct kgsl_device *device,
 	void *snapshot, int remain, void *priv)
 {
 	struct kgsl_snapshot_debug *header = snapshot;
-	unsigned int i;
 	unsigned int *data = snapshot + sizeof(*header);
-	unsigned int shader_read_len = SHADER_MEMORY_SIZE;
-
-	if (SHADER_MEMORY_SIZE > (device->shader_mem_len >> 2))
-		shader_read_len = (device->shader_mem_len >> 2);
+	int i;
 
 	if (remain < DEBUG_SECTION_SZ(SHADER_MEMORY_SIZE)) {
 		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
@@ -66,22 +36,8 @@
 	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
 	header->size = SHADER_MEMORY_SIZE;
 
-	/* Map shader memory to kernel, for dumping */
-	if (device->shader_mem_virt == NULL)
-		device->shader_mem_virt = devm_ioremap(device->dev,
-					device->shader_mem_phys,
-					device->shader_mem_len);
-
-	if (device->shader_mem_virt == NULL) {
-		KGSL_DRV_ERR(device,
-		"Unable to map shader memory region\n");
-		return 0;
-	}
-
-	/* Now, dump shader memory to snapshot */
-	for (i = 0; i < shader_read_len; i++)
-		adreno_shadermem_regread(device, i, &data[i]);
-
+	for (i = 0; i < SHADER_MEMORY_SIZE; i++)
+		adreno_regread(device, 0x4000 + i, &data[i]);
 
 	return DEBUG_SECTION_SZ(SHADER_MEMORY_SIZE);
 }
@@ -214,8 +170,7 @@
 	int i, size;
 
 	/* The size of the ROQ buffer is core dependent */
-	size = (adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)) ?
+	size = adreno_is_a330(adreno_dev) ?
 		A330_CP_ROQ_SIZE : A320_CP_ROQ_SIZE;
 
 	if (remain < DEBUG_SECTION_SZ(size)) {
@@ -265,77 +220,66 @@
 	return DEBUG_SECTION_SZ(size);
 }
 
-struct debugbus_block {
-	unsigned int block_id;
-	unsigned int dwords;
-};
+#define DEBUGFS_BLOCK_SIZE 0x40
 
 static int a3xx_snapshot_debugbus_block(struct kgsl_device *device,
 	void *snapshot, int remain, void *priv)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
 	struct kgsl_snapshot_debugbus *header = snapshot;
-	struct debugbus_block *block = priv;
+	unsigned int id = (unsigned int) priv;
+	unsigned int val;
 	int i;
 	unsigned int *data = snapshot + sizeof(*header);
-	unsigned int dwords;
-	int size;
-
-	/*
-	 * For A305 and A320 all debug bus regions are the same size (0x40). For
-	 * A330, they can be different sizes - most are still 0x40, but some
-	 * like CP are larger
-	 */
-
-	dwords = (adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)) ?
-		block->dwords : 0x40;
-
-	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+	int size =
+		(DEBUGFS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header);
 
 	if (remain < size) {
 		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
 		return 0;
 	}
 
-	header->id = block->block_id;
-	header->count = dwords;
+	val = (id << 8) | (1 << 16);
 
-	for (i = 0; i < dwords; i++)
-		_rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
+	header->id = id;
+	header->count = DEBUGFS_BLOCK_SIZE;
+
+	for (i = 0; i < DEBUGFS_BLOCK_SIZE; i++) {
+		adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, val | i);
+		adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS,
+			&data[i]);
+	}
 
 	return size;
 }
 
-static struct debugbus_block debugbus_blocks[] = {
-	{ RBBM_BLOCK_ID_CP, 0x52, },
-	{ RBBM_BLOCK_ID_RBBM, 0x40, },
-	{ RBBM_BLOCK_ID_VBIF, 0x40, },
-	{ RBBM_BLOCK_ID_HLSQ, 0x40, },
-	{ RBBM_BLOCK_ID_UCHE, 0x40, },
-	{ RBBM_BLOCK_ID_PC, 0x40, },
-	{ RBBM_BLOCK_ID_VFD, 0x40, },
-	{ RBBM_BLOCK_ID_VPC, 0x40, },
-	{ RBBM_BLOCK_ID_TSE, 0x40, },
-	{ RBBM_BLOCK_ID_RAS, 0x40, },
-	{ RBBM_BLOCK_ID_VSC, 0x40, },
-	{ RBBM_BLOCK_ID_SP_0, 0x40, },
-	{ RBBM_BLOCK_ID_SP_1, 0x40, },
-	{ RBBM_BLOCK_ID_SP_2, 0x40, },
-	{ RBBM_BLOCK_ID_SP_3, 0x40, },
-	{ RBBM_BLOCK_ID_TPL1_0, 0x40, },
-	{ RBBM_BLOCK_ID_TPL1_1, 0x40, },
-	{ RBBM_BLOCK_ID_TPL1_2, 0x40, },
-	{ RBBM_BLOCK_ID_TPL1_3, 0x40, },
-	{ RBBM_BLOCK_ID_RB_0, 0x40, },
-	{ RBBM_BLOCK_ID_RB_1, 0x40, },
-	{ RBBM_BLOCK_ID_RB_2, 0x40, },
-	{ RBBM_BLOCK_ID_RB_3, 0x40, },
-	{ RBBM_BLOCK_ID_MARB_0, 0x40, },
-	{ RBBM_BLOCK_ID_MARB_1, 0x40, },
-	{ RBBM_BLOCK_ID_MARB_2, 0x40, },
-	{ RBBM_BLOCK_ID_MARB_3, 0x40, },
+static unsigned int debugbus_blocks[] = {
+	RBBM_BLOCK_ID_CP,
+	RBBM_BLOCK_ID_RBBM,
+	RBBM_BLOCK_ID_VBIF,
+	RBBM_BLOCK_ID_HLSQ,
+	RBBM_BLOCK_ID_UCHE,
+	RBBM_BLOCK_ID_PC,
+	RBBM_BLOCK_ID_VFD,
+	RBBM_BLOCK_ID_VPC,
+	RBBM_BLOCK_ID_TSE,
+	RBBM_BLOCK_ID_RAS,
+	RBBM_BLOCK_ID_VSC,
+	RBBM_BLOCK_ID_SP_0,
+	RBBM_BLOCK_ID_SP_1,
+	RBBM_BLOCK_ID_SP_2,
+	RBBM_BLOCK_ID_SP_3,
+	RBBM_BLOCK_ID_TPL1_0,
+	RBBM_BLOCK_ID_TPL1_1,
+	RBBM_BLOCK_ID_TPL1_2,
+	RBBM_BLOCK_ID_TPL1_3,
+	RBBM_BLOCK_ID_RB_0,
+	RBBM_BLOCK_ID_RB_1,
+	RBBM_BLOCK_ID_RB_2,
+	RBBM_BLOCK_ID_RB_3,
+	RBBM_BLOCK_ID_MARB_0,
+	RBBM_BLOCK_ID_MARB_1,
+	RBBM_BLOCK_ID_MARB_2,
+	RBBM_BLOCK_ID_MARB_3,
 };
 
 static void *a3xx_snapshot_debugbus(struct kgsl_device *device,
@@ -347,7 +291,7 @@
 		snapshot = kgsl_snapshot_add_section(device,
 			KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, remain,
 			a3xx_snapshot_debugbus_block,
-			(void *) &debugbus_blocks[i]);
+			(void *) debugbus_blocks[i]);
 	}
 
 	return snapshot;
@@ -365,58 +309,18 @@
 	struct kgsl_snapshot_registers_list *list,
 	struct adreno_device *adreno_dev)
 {
-	struct kgsl_device *device = &adreno_dev->dev;
-
+	/* HLSQ specific registers */
 	/*
-	 * Trying to read HLSQ registers when the HLSQ block is busy
-	 * will cause the device to hang.  The RBBM_DEBUG_BUS has information
-	 * that will tell us if the HLSQ block is busy or not.  Read values
-	 * from the debug bus to ensure the HLSQ block is not busy (this
-	 * is hardware dependent).  If the HLSQ block is busy do not
-	 * dump the registers, otherwise dump the HLSQ registers.
+	 * Don't dump any a3xx HLSQ registers just yet.  Reading the HLSQ
+	 * registers can cause the device to hang if the HLSQ block is
+	 * busy.  Add specific checks for each a3xx core as the requirements
+	 * are discovered.  Disable by default for now.
 	 */
-
-	if (adreno_is_a330(adreno_dev)) {
-		/*
-		 * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27]
-		 *
-		 * if (!stall_context_full)
-		 * then dump HLSQ registers
-		 */
-		unsigned int stall_context_full = 0;
-
-		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49,
-				&stall_context_full);
-		stall_context_full &= 0x08000000;
-
-		if (stall_context_full)
-			return;
-	} else {
-		/*
-		 * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
-		 * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
-		 *
-		 * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
-		 * then dump HLSQ registers
-		 */
-		unsigned int next_pif = 0;
-
-		/* check tpif */
-		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
-		next_pif &= 0x1f;
-		if (next_pif != 0 && next_pif != 1 && next_pif != 28)
-			return;
-
-		/* check spif */
-		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
-		next_pif &= 0x3f;
-		if (next_pif != 0 && next_pif != 1 && next_pif != 10)
-			return;
+	if (!adreno_is_a3xx(adreno_dev)) {
+		regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers;
+		regs[list->count].count = a3xx_hlsq_registers_count;
+		list->count++;
 	}
-
-	regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers;
-	regs[list->count].count = a3xx_hlsq_registers_count;
-	list->count++;
 }
 
 static void _snapshot_a330_regs(struct kgsl_snapshot_registers *regs,
@@ -438,7 +342,6 @@
 	struct kgsl_device *device = &adreno_dev->dev;
 	struct kgsl_snapshot_registers_list list;
 	struct kgsl_snapshot_registers regs[5];
-	int size;
 
 	list.registers = regs;
 	list.count = 0;
@@ -449,7 +352,7 @@
 	/* Store relevant registers in list to snapshot */
 	_snapshot_a3xx_regs(regs, &list);
 	_snapshot_hlsq_regs(regs, &list, adreno_dev);
-	if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
+	if (adreno_is_a330(adreno_dev))
 		_snapshot_a330_regs(regs, &list);
 
 	/* Master set of (non debug) registers */
@@ -457,15 +360,10 @@
 		KGSL_SNAPSHOT_SECTION_REGS, snapshot, remain,
 		kgsl_snapshot_dump_regs, &list);
 
-	/*
-	 * CP_STATE_DEBUG indexed registers - 20 on 305 and 320 and 46 on A330
-	 */
-	size = (adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)) ? 0x2E : 0x14;
-
+	/* CP_STATE_DEBUG indexed registers */
 	snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
 			remain, REG_CP_STATE_DEBUG_INDEX,
-			REG_CP_STATE_DEBUG_DATA, 0x0, size);
+			REG_CP_STATE_DEBUG_DATA, 0x0, 0x14);
 
 	/* CP_ME indexed registers */
 	snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
@@ -506,8 +404,7 @@
 			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
 			a3xx_snapshot_cp_roq, NULL);
 
-	if (adreno_is_a330(adreno_dev) ||
-		adreno_is_a305b(adreno_dev)) {
+	if (adreno_is_a330(adreno_dev)) {
 		snapshot = kgsl_snapshot_add_section(device,
 			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
 			a330_snapshot_cp_merciu, NULL);
@@ -517,7 +414,7 @@
 
 	/* Enable Clock gating */
 	adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
-		adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
+			A3XX_RBBM_CLOCK_CTL_DEFAULT);
 
 	return snapshot;
 }
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index ef599e9..890c8a1 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -64,6 +64,11 @@
 	adreno_dev->fast_hang_detect = 1;
 	debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs,
 			   &adreno_dev->fast_hang_detect);
+
+	/* Top level switch to enable/disable userspace FT control */
+	adreno_dev->ft_user_control = 0;
+	debugfs_create_u32("ft_user_control", 0644, device->d_debugfs,
+			   &adreno_dev->ft_user_control);
 	/*
 	 * FT policy can be set to any of the options below.
 	 * KGSL_FT_DISABLE -> BIT(0) Set to disable FT
@@ -75,6 +80,7 @@
 	adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
 	debugfs_create_u32("ft_policy", 0644, device->d_debugfs,
 			   &adreno_dev->ft_policy);
+
 	/* By default enable long IB detection */
 	adreno_dev->long_ib_detect = 1;
 	debugfs_create_u32("long_ib_detect", 0644, device->d_debugfs,
@@ -90,7 +96,7 @@
 	 * KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT -> BIT(3) Set to log only one
 	 * pagefault per INT.
 	 */
-	 adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
-	 debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
-			&adreno_dev->ft_pf_policy);
+	adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
+	debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
+			   &adreno_dev->ft_pf_policy);
 }
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index 176717d..6fbcdee 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -12,7 +12,6 @@
  */
 
 #include <linux/slab.h>
-#include <linux/msm_kgsl.h>
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
@@ -144,7 +143,7 @@
  */
 int adreno_drawctxt_create(struct kgsl_device *device,
 			struct kgsl_pagetable *pagetable,
-			struct kgsl_context *context, uint32_t *flags)
+			struct kgsl_context *context, uint32_t flags)
 {
 	struct adreno_context *drawctxt;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -163,36 +162,26 @@
 	drawctxt->id = context->id;
 	rb->timestamp[context->id] = 0;
 
-	*flags &= (KGSL_CONTEXT_PREAMBLE |
-		KGSL_CONTEXT_NO_GMEM_ALLOC |
-		KGSL_CONTEXT_PER_CONTEXT_TS |
-		KGSL_CONTEXT_USER_GENERATED_TS |
-		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
-		KGSL_CONTEXT_TYPE_MASK);
-
-	if (*flags & KGSL_CONTEXT_PREAMBLE)
+	if (flags & KGSL_CONTEXT_PREAMBLE)
 		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
 
-	if (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
+	if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
 		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
 
-	if (*flags & KGSL_CONTEXT_PER_CONTEXT_TS)
+	if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
 		drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
 
-	if (*flags & KGSL_CONTEXT_USER_GENERATED_TS) {
-		if (!(*flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
+	if (flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		if (!(flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
 			ret = -EINVAL;
 			goto err;
 		}
 		drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
 	}
 
-	if (*flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+	if (flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
 		drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE;
 
-	drawctxt->type =
-		(*flags & KGSL_CONTEXT_TYPE_MASK) >> KGSL_CONTEXT_TYPE_SHIFT;
-
 	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
 	if (ret)
 		goto err;
@@ -253,6 +242,10 @@
 	if (device->state != KGSL_STATE_HUNG)
 		adreno_idle(device);
 
+	if (adreno_is_a20x(adreno_dev) && adreno_dev->drawctxt_active)
+		kgsl_setstate(&device->mmu, adreno_dev->drawctxt_active->id,
+			KGSL_MMUFLAGS_PTUPDATE);
+
 	kgsl_sharedmem_free(&drawctxt->gpustate);
 	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
 
@@ -313,10 +306,8 @@
 		return;
 	}
 
-	KGSL_CTXT_INFO(device, "from %d to %d flags %d\n",
-		adreno_dev->drawctxt_active ?
-		adreno_dev->drawctxt_active->id : 0,
-		drawctxt ? drawctxt->id : 0, flags);
+	KGSL_CTXT_INFO(device, "from %p to %p flags %d\n",
+			adreno_dev->drawctxt_active, drawctxt, flags);
 
 	/* Save the old context */
 	adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active);
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index f0f3b6b..c91bfc0 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -57,14 +57,6 @@
 /* Context no fault tolerance */
 #define CTXT_FLAGS_NO_FAULT_TOLERANCE  BIT(16)
 
-/* Symbolic table for the adreno draw context type */
-#define ADRENO_DRAWCTXT_TYPES \
-	{ KGSL_CONTEXT_TYPE_ANY, "any" }, \
-	{ KGSL_CONTEXT_TYPE_GL, "GL" }, \
-	{ KGSL_CONTEXT_TYPE_CL, "CL" }, \
-	{ KGSL_CONTEXT_TYPE_C2D, "C2D" }, \
-	{ KGSL_CONTEXT_TYPE_RS, "RS" }
-
 struct kgsl_device;
 struct adreno_device;
 struct kgsl_device_private;
@@ -103,7 +95,6 @@
 	uint32_t flags;
 	uint32_t pagefault;
 	unsigned long pagefault_ts;
-	unsigned int type;
 	struct kgsl_pagetable *pagetable;
 	struct kgsl_memdesc gpustate;
 	unsigned int reg_restore[3];
@@ -136,7 +127,7 @@
 int adreno_drawctxt_create(struct kgsl_device *device,
 			struct kgsl_pagetable *pagetable,
 			struct kgsl_context *context,
-			uint32_t *flags);
+			uint32_t flags);
 
 void adreno_drawctxt_destroy(struct kgsl_device *device,
 			  struct kgsl_context *context);
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index f449870..a3fa312 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -143,10 +143,10 @@
 #define CP_IM_STORE            0x2c
 
 /* test 2 memory locations to dword values specified */
-#define CP_TEST_TWO_MEMS	0x71
+#define CP_TEST_TWO_MEMS    0x71
 
 /* PFP waits until the FIFO between the PFP and the ME is empty */
-#define CP_WAIT_FOR_ME		0x13
+#define CP_WAIT_FOR_ME      0x13
 
 /*
  * for a20x
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 5b52fd8..5fdcf19 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -12,7 +12,6 @@
  */
 
 #include <linux/vmalloc.h>
-#include <mach/board.h>
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
@@ -51,7 +50,6 @@
 	{CP_DRAW_INDX,			"DRW_NDX_"},
 	{CP_DRAW_INDX_BIN,		"DRW_NDXB"},
 	{CP_EVENT_WRITE,		"EVENT_WT"},
-	{CP_MEM_WRITE,			"MEM_WRIT"},
 	{CP_IM_LOAD,			"IN__LOAD"},
 	{CP_IM_LOAD_IMMEDIATE,		"IM_LOADI"},
 	{CP_IM_STORE,			"IM_STORE"},
@@ -71,14 +69,6 @@
 	{CP_WAIT_FOR_IDLE,		"WAIT4IDL"},
 };
 
-static const struct pm_id_name pm3_nop_values[] = {
-	{KGSL_CONTEXT_TO_MEM_IDENTIFIER,	"CTX_SWCH"},
-	{KGSL_CMD_IDENTIFIER,			"CMD__EXT"},
-	{KGSL_CMD_INTERNAL_IDENTIFIER,		"CMD__INT"},
-	{KGSL_START_OF_IB_IDENTIFIER,		"IB_START"},
-	{KGSL_END_OF_IB_IDENTIFIER,		"IB___END"},
-};
-
 static uint32_t adreno_is_pm4_len(uint32_t word)
 {
 	if (word == INVALID_RB_CMD)
@@ -138,28 +128,6 @@
 	return "????????";
 }
 
-static bool adreno_is_pm3_nop_value(uint32_t word)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pm3_nop_values); ++i) {
-		if (word == pm3_nop_values[i].id)
-			return 1;
-	}
-	return 0;
-}
-
-static const char *adreno_pm3_nop_name(uint32_t word)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(pm3_nop_values); ++i) {
-		if (word == pm3_nop_values[i].id)
-			return pm3_nop_values[i].name;
-	}
-	return "????????";
-}
-
 static void adreno_dump_regs(struct kgsl_device *device,
 			   const int *registers, int size)
 {
@@ -276,13 +244,8 @@
 				"%s", adreno_pm4_name(ptr4[j]));
 			*argp = -(adreno_is_pm4_len(ptr4[j])+1);
 		} else {
-			if (adreno_is_pm3_nop_value(ptr4[j]))
-				lx += scnprintf(linebuf + lx, linebuflen - lx,
-					"%s", adreno_pm3_nop_name(ptr4[j]));
-			else
-				lx += scnprintf(linebuf + lx, linebuflen - lx,
-					"%8.8X", ptr4[j]);
-
+			lx += scnprintf(linebuf + lx, linebuflen - lx,
+				"%8.8X", ptr4[j]);
 			if (*argp > 1)
 				--*argp;
 			else if (*argp == 1) {
@@ -702,7 +665,7 @@
 		" %08X\n", r1, r2, r3);
 
 	KGSL_LOG_DUMP(device, "PAGETABLE SIZE: %08X ",
-		kgsl_mmu_get_ptsize(&device->mmu));
+		kgsl_mmu_get_ptsize());
 
 	kgsl_regread(device, MH_MMU_TRAN_ERROR, &r1);
 	KGSL_LOG_DUMP(device, "        TRAN_ERROR = %08X\n", r1);
@@ -740,7 +703,6 @@
 	mb();
 
 	if (device->pm_dump_enable) {
-
 		if (adreno_is_a2xx(adreno_dev))
 			adreno_dump_a2xx(device);
 		else if (adreno_is_a3xx(adreno_dev))
@@ -928,8 +890,7 @@
 			adreno_dump_regs(device, a3xx_registers,
 					a3xx_registers_count);
 
-			if (adreno_is_a330(adreno_dev) ||
-				adreno_is_a305b(adreno_dev))
+			if (adreno_is_a330(adreno_dev))
 				adreno_dump_regs(device, a330_registers,
 					a330_registers_count);
 		}
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index a4bb4fa..48367fa 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -18,7 +18,6 @@
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_cffdump.h"
-#include "kgsl_trace.h"
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
@@ -320,7 +319,7 @@
 	return 0;
 }
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb)
+int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
 {
 	int status;
 	/*cp_rb_cntl_u cp_rb_cntl; */
@@ -332,6 +331,9 @@
 	if (rb->flags & KGSL_FLAGS_STARTED)
 		return 0;
 
+	if (init_ram)
+		rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
+
 	kgsl_sharedmem_set(&rb->memptrs_desc, 0, 0,
 			   sizeof(struct kgsl_rbmemptrs));
 
@@ -431,11 +433,8 @@
 		return status;
 
 	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
-	if (adreno_is_a305(adreno_dev) || adreno_is_a305c(adreno_dev) ||
-		adreno_is_a320(adreno_dev))
+	if (adreno_is_a305(adreno_dev) || adreno_is_a320(adreno_dev))
 		adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x000E0602);
-	else if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
-		adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x003E2008);
 
 	rb->rptr = 0;
 	rb->wptr = 0;
@@ -444,9 +443,7 @@
 	adreno_regwrite(device, REG_CP_ME_CNTL, 0);
 
 	/* ME init is GPU specific, so jump into the sub-function */
-	status = adreno_dev->gpudev->rb_init(adreno_dev, rb);
-	if (status)
-		return status;
+	adreno_dev->gpudev->rb_init(adreno_dev, rb);
 
 	/* idle device to validate ME INIT */
 	status = adreno_idle(device);
@@ -484,7 +481,6 @@
 	 */
 	rb->sizedwords = KGSL_RB_SIZE >> 2;
 
-	rb->buffer_desc.flags = KGSL_MEMFLAGS_GPUREADONLY;
 	/* allocate memory for ringbuffer */
 	status = kgsl_allocate_contiguous(&rb->buffer_desc,
 		(rb->sizedwords << 2));
@@ -568,8 +564,6 @@
 	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
 	/* 2 dwords to store the start of command sequence */
 	total_sizedwords += 2;
-	/* internal ib command identifier for the ringbuffer */
-	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
 
 	/* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
 	total_sizedwords += context ? 13 : 0;
@@ -581,18 +575,16 @@
 		total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
 
 	total_sizedwords += 2; /* scratchpad ts for fault tolerance */
-	total_sizedwords += 3; /* sop timestamp */
-	total_sizedwords += 4; /* eop timestamp */
-
 	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
 			!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		total_sizedwords += 3; /* sop timestamp */
+		total_sizedwords += 4; /* eop timestamp */
 		total_sizedwords += 3; /* global timestamp without cache
 					* flush for non-zero context */
+	} else {
+		total_sizedwords += 4; /* global timestamp for fault tolerance*/
 	}
 
-	if (adreno_is_a20x(adreno_dev))
-		total_sizedwords += 2; /* CACHE_FLUSH */
-
 	if (flags & KGSL_CMD_FLAGS_EOF)
 		total_sizedwords += 2;
 
@@ -611,36 +603,6 @@
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
 	GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
 
-	if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) {
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_INTERNAL_IDENTIFIER);
-	}
-
-	/* always increment the global timestamp. once. */
-	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
-
-	/* Do not update context's timestamp for internal submissions */
-	if (context && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
-		if (context_id == KGSL_MEMSTORE_GLOBAL)
-			rb->timestamp[context->id] =
-				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
-		else if (context->flags & CTXT_FLAGS_USER_GENERATED_TS)
-			rb->timestamp[context_id] = timestamp;
-		else
-			rb->timestamp[context_id]++;
-	}
-	timestamp = rb->timestamp[context_id];
-
-	/* scratchpad ts for fault tolerance */
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
-
-	/* start-of-pipeline timestamp */
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-		KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
 	if (flags & KGSL_CMD_FLAGS_PMODE) {
 		/* disable protected mode error checking */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -660,6 +622,21 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
 	}
 
+	/* always increment the global timestamp. once. */
+	rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
+
+	/* Do not update context's timestamp for internal submissions */
+	if (context && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		if (context_id == KGSL_MEMSTORE_GLOBAL)
+			rb->timestamp[context->id] =
+				rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+		else if (context->flags & CTXT_FLAGS_USER_GENERATED_TS)
+			rb->timestamp[context_id] = timestamp;
+		else
+			rb->timestamp[context_id]++;
+	}
+	timestamp = rb->timestamp[context_id];
+
 	/* HW Workaround for MMU Page fault
 	* due to memory getting free early before
 	* GPU completes it.
@@ -670,10 +647,14 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
+	/* scratchpad ts for fault tolerance */
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
+	GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
+
 	if (adreno_is_a3xx(adreno_dev)) {
 		/*
-		 * Flush HLSQ lazy updates to make sure there are no
-		 * resources pending for indirect loads after the timestamp
+		 * FLush HLSQ lazy updates to make sure there are no
+		 * rsources pending for indirect loads after the timestamp
 		 */
 
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -684,19 +665,22 @@
 		GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
 	}
 
-	/*
-	 * end-of-pipeline timestamp.  If per context timestamps is not
-	 * enabled, then context_id will be KGSL_MEMSTORE_GLOBAL so all
-	 * eop timestamps will work out.
-	 */
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
-		KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
-	GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
 	if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS
 			&& !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		/* start-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_MEM_WRITE, 2));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
+		/* end-of-pipeline timestamp */
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_EVENT_WRITE, 3));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
 
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_MEM_WRITE, 2));
@@ -705,14 +689,16 @@
 				eoptimestamp)));
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
 			rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
-	}
-
-	if (adreno_is_a20x(adreno_dev)) {
+	} else {
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_EVENT_WRITE, 1));
-		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH);
+			cp_type3_packet(CP_EVENT_WRITE, 3));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+		GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+						eoptimestamp)));
+		GSL_RB_WRITE(ringcmds, rcmd_gpu,
+				rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
 	}
-
 	if (context) {
 		/* Conditional execution based on memory values */
 		GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -990,31 +976,43 @@
 {
 	struct kgsl_device *device = dev_priv->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	unsigned int *link = 0;
+	unsigned int *link;
 	unsigned int *cmds;
 	unsigned int i;
-	struct adreno_context *drawctxt = NULL;
+	struct adreno_context *drawctxt;
 	unsigned int start_index = 0;
-	int ret;
 
-	if (device->state & KGSL_STATE_HUNG) {
-		ret = -EBUSY;
-		goto done;
-	}
-
+	if (device->state & KGSL_STATE_HUNG)
+		return -EBUSY;
 	if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED) ||
-	      context == NULL || ibdesc == 0 || numibs == 0) {
-		ret = -EINVAL;
-		goto done;
-	}
+	      context == NULL || ibdesc == 0 || numibs == 0)
+		return -EINVAL;
+
 	drawctxt = context->devctxt;
 
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
 		KGSL_CTXT_ERR(device, "proc %s failed fault tolerance"
 			" will not accept commands for context %d\n",
 			drawctxt->pid_name, drawctxt->id);
-		ret = -EDEADLK;
-		goto done;
+		return -EDEADLK;
+	}
+
+	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+		KGSL_CTXT_ERR(device,
+			"proc %s triggered fault tolerance"
+			" skipping commands for context till EOF %d\n",
+			drawctxt->pid_name, drawctxt->id);
+		if (flags & KGSL_CMD_FLAGS_EOF)
+			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+		numibs = 0;
+	}
+
+	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
+				GFP_KERNEL);
+	if (!link) {
+		KGSL_CORE_ERR("kzalloc(%d) failed\n",
+			sizeof(unsigned int) * (numibs * 3 + 4));
+		return -ENOMEM;
 	}
 
 	/*When preamble is enabled, the preamble buffer with state restoration
@@ -1025,26 +1023,6 @@
 		adreno_dev->drawctxt_active == drawctxt)
 		start_index = 1;
 
-	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
-		KGSL_CTXT_ERR(device,
-			"proc %s triggered fault tolerance"
-			" skipping commands for context till EOF %d\n",
-			drawctxt->pid_name, drawctxt->id);
-		if (flags & KGSL_CMD_FLAGS_EOF)
-			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
-		if (start_index)
-			numibs = 1;
-		else
-			numibs = 0;
-	}
-
-	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
-				GFP_KERNEL);
-	if (!link) {
-		ret = -ENOMEM;
-		goto done;
-	}
-
 	if (!start_index) {
 		*cmds++ = cp_nop_packet(1);
 		*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
@@ -1059,15 +1037,9 @@
 		if (unlikely(adreno_dev->ib_check_level >= 1 &&
 		    !_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
 				ibdesc[i].sizedwords))) {
-			ret = -EINVAL;
-			goto done;
+			kfree(link);
+			return -EINVAL;
 		}
-
-		if (ibdesc[i].sizedwords == 0) {
-			ret = -EINVAL;
-			goto done;
-		}
-
 		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
 		*cmds++ = ibdesc[i].gpuaddr;
 		*cmds++ = ibdesc[i].sizedwords;
@@ -1087,6 +1059,11 @@
 					(flags & KGSL_CMD_FLAGS_EOF),
 					&link[0], (cmds - link), *timestamp);
 
+	KGSL_CMD_INFO(device, "ctxt %d g %08x numibs %d ts %d\n",
+		context->id, (unsigned int)ibdesc, numibs, *timestamp);
+
+	kfree(link);
+
 #ifdef CONFIG_MSM_KGSL_CFF_DUMP
 	/*
 	 * insert wait for idle after every IB1
@@ -1102,16 +1079,9 @@
 	 */
 	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) {
 		drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT;
-		ret = -EPROTO;
+		return -EPROTO;
 	} else
-		ret = 0;
-
-done:
-	trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs,
-		*timestamp, flags, ret, drawctxt->type);
-
-	kfree(link);
-	return ret;
+		return 0;
 }
 
 static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index e563ec7..fa03c05 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -97,7 +97,8 @@
 
 int adreno_ringbuffer_init(struct kgsl_device *device);
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb);
+int adreno_ringbuffer_start(struct adreno_ringbuffer *rb,
+				unsigned int init_ram);
 
 void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb);
 
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index a76ed87..f23586e 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -162,12 +162,6 @@
 static unsigned int sp_fs_pvt_mem_addr;
 
 /*
- * Cached value of SP_VS_OBJ_START_REG and SP_FS_OBJ_START_REG.
- */
-static unsigned int sp_vs_obj_start_reg;
-static unsigned int sp_fs_obj_start_reg;
-
-/*
  * Each load state block has two possible types.  Each type has a different
  * number of dwords per unit.  Use this handy lookup table to make sure
  * we dump the right amount of data from the indirect buffer
@@ -379,26 +373,6 @@
 		sp_fs_pvt_mem_addr = 0;
 	}
 
-	if (sp_vs_obj_start_reg) {
-		ret = kgsl_snapshot_get_object(device, ptbase,
-			sp_vs_obj_start_reg & 0xFFFFFFE0, 0,
-			SNAPSHOT_GPU_OBJECT_GENERIC);
-		if (ret < 0)
-			return -EINVAL;
-		snapshot_frozen_objsize += ret;
-		sp_vs_obj_start_reg = 0;
-	}
-
-	if (sp_fs_obj_start_reg) {
-		ret = kgsl_snapshot_get_object(device, ptbase,
-			sp_fs_obj_start_reg & 0xFFFFFFE0, 0,
-			SNAPSHOT_GPU_OBJECT_GENERIC);
-		if (ret < 0)
-			return -EINVAL;
-		snapshot_frozen_objsize += ret;
-		sp_fs_obj_start_reg = 0;
-	}
-
 	/* Finally: VBOs */
 
 	/* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
@@ -470,7 +444,7 @@
 	int offset = type0_pkt_offset(*ptr);
 	int i;
 
-	for (i = 0; i < size - 1; i++, offset++) {
+	for (i = 0; i < size; i++, offset++) {
 
 		/* Visiblity stream buffer */
 
@@ -531,20 +505,11 @@
 			case A3XX_SP_FS_PVT_MEM_ADDR_REG:
 				sp_fs_pvt_mem_addr = ptr[i + 1];
 				break;
-			case A3XX_SP_VS_OBJ_START_REG:
-				sp_vs_obj_start_reg = ptr[i + 1];
-				break;
-			case A3XX_SP_FS_OBJ_START_REG:
-				sp_fs_obj_start_reg = ptr[i + 1];
-				break;
 			}
 		}
 	}
 }
 
-static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase,
-		unsigned int gpuaddr, unsigned int dwords);
-
 /* Add an IB as a GPU object, but first, parse it to find more goodies within */
 
 static int ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
@@ -584,12 +549,32 @@
 			if (adreno_cmd_is_ib(src[i])) {
 				unsigned int gpuaddr = src[i + 1];
 				unsigned int size = src[i + 2];
+				unsigned int ibbase;
 
-				ret = parse_ib(device, ptbase, gpuaddr, size);
+				/* Address of the last processed IB2 */
+				kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
 
-				/* If adding the IB failed then stop parsing */
-				if (ret < 0)
-					goto done;
+				/*
+				 * If this is the last IB2 that was executed,
+				 * then push it to make sure it goes into the
+				 * static space
+				 */
+
+				if (ibbase == gpuaddr)
+					push_object(device,
+						SNAPSHOT_OBJ_TYPE_IB, ptbase,
+						gpuaddr, size);
+				else {
+					ret = ib_add_gpu_object(device,
+						ptbase, gpuaddr, size);
+
+					/*
+					 * If adding the IB failed then stop
+					 * parsing
+					 */
+					if (ret < 0)
+						goto done;
+				}
 			} else {
 				ret = ib_parse_type3(device, &src[i], ptbase);
 				/*
@@ -619,34 +604,29 @@
 	return ret;
 }
 
-/*
- * We want to store the last executed IB1 and IB2 in the static region to ensure
- * that we get at least some information out of the snapshot even if we can't
- * access the dynamic data from the sysfs file.  Push all other IBs on the
- * dynamic list
- */
-static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase,
-		unsigned int gpuaddr, unsigned int dwords)
+/* Snapshot the istore memory */
+static int snapshot_istore(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
 {
-	unsigned int ib1base, ib2base;
-	int ret = 0;
+	struct kgsl_snapshot_istore *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int count, i;
 
-	/*
-	 * Check the IB address - if it is either the last executed IB1 or the
-	 * last executed IB2 then push it into the static blob otherwise put
-	 * it in the dynamic list
-	 */
+	count = adreno_dev->istore_size * adreno_dev->instruction_size;
 
-	kgsl_regread(device, REG_CP_IB1_BASE, &ib1base);
-	kgsl_regread(device, REG_CP_IB2_BASE, &ib2base);
+	if (remain < (count * 4) + sizeof(*header)) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Not enough memory for the istore section");
+		return 0;
+	}
 
-	if (gpuaddr == ib1base || gpuaddr == ib2base)
-		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
-			gpuaddr, dwords);
-	else
-		ret = ib_add_gpu_object(device, ptbase, gpuaddr, dwords);
+	header->count = adreno_dev->istore_size;
 
-	return ret;
+	for (i = 0; i < count; i++)
+		kgsl_regread(device, ADRENO_ISTORE_START + i, &data[i]);
+
+	return (count * 4) + sizeof(*header);
 }
 
 /* Snapshot the ringbuffer memory */
@@ -799,11 +779,12 @@
 			 * others get marked at GPU objects
 			 */
 
-			if (memdesc != NULL)
+			if (ibaddr == ibbase || memdesc != NULL)
 				push_object(device, SNAPSHOT_OBJ_TYPE_IB,
 					ptbase, ibaddr, ibsize);
 			else
-				parse_ib(device, ptbase, ibaddr, ibsize);
+				ib_add_gpu_object(device, ptbase, ibaddr,
+					ibsize);
 		}
 
 		index = index + 1;
@@ -818,64 +799,6 @@
 	return size + sizeof(*header);
 }
 
-static int snapshot_capture_mem_list(struct kgsl_device *device, void *snapshot,
-			int remain, void *priv)
-{
-	struct kgsl_snapshot_replay_mem_list *header = snapshot;
-	struct kgsl_process_private *private;
-	unsigned int ptbase;
-	struct rb_node *node;
-	struct kgsl_mem_entry *entry = NULL;
-	int num_mem;
-	unsigned int *data = snapshot + sizeof(*header);
-
-	ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
-	mutex_lock(&kgsl_driver.process_mutex);
-	list_for_each_entry(private, &kgsl_driver.process_list, list) {
-		if (kgsl_mmu_pt_equal(&device->mmu, private->pagetable,
-			ptbase))
-			break;
-	}
-	mutex_unlock(&kgsl_driver.process_mutex);
-	if (!private) {
-		KGSL_DRV_ERR(device,
-		"Failed to get pointer to process private structure\n");
-		return 0;
-	}
-	/* We need to know the number of memory objects that the process has */
-	spin_lock(&private->mem_lock);
-	for (node = rb_first(&private->mem_rb), num_mem = 0; node; ) {
-		entry = rb_entry(node, struct kgsl_mem_entry, node);
-		node = rb_next(&entry->node);
-		num_mem++;
-	}
-
-	if (remain < ((num_mem * 3 * sizeof(unsigned int)) +
-			sizeof(*header))) {
-		KGSL_DRV_ERR(device,
-			"snapshot: Not enough memory for the mem list section");
-		spin_unlock(&private->mem_lock);
-		return 0;
-	}
-	header->num_entries = num_mem;
-	header->ptbase = ptbase;
-	/*
-	 * Walk throught the memory list and store the
-	 * tuples(gpuaddr, size, memtype) in snapshot
-	 */
-	for (node = rb_first(&private->mem_rb); node; ) {
-		entry = rb_entry(node, struct kgsl_mem_entry, node);
-		node = rb_next(&entry->node);
-
-		*data++ = entry->memdesc.gpuaddr;
-		*data++ = entry->memdesc.size;
-		*data++ = (entry->memdesc.priv & KGSL_MEMTYPE_MASK) >>
-							KGSL_MEMTYPE_SHIFT;
-	}
-	spin_unlock(&private->mem_lock);
-	return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
-}
-
 /* Snapshot the memory for an indirect buffer */
 static int snapshot_ib(struct kgsl_device *device, void *snapshot,
 	int remain, void *priv)
@@ -906,14 +829,15 @@
 				continue;
 
 			if (adreno_cmd_is_ib(*src))
-				ret = parse_ib(device, obj->ptbase, src[1],
-					src[2]);
-			else
+				push_object(device, SNAPSHOT_OBJ_TYPE_IB,
+					obj->ptbase, src[1], src[2]);
+			else {
 				ret = ib_parse_type3(device, src, obj->ptbase);
 
-			/* Stop parsing if the type3 decode fails */
-			if (ret < 0)
-				break;
+				/* Stop parsing if the type3 decode fails */
+				if (ret < 0)
+					break;
+			}
 		}
 	}
 
@@ -979,13 +903,6 @@
 		snapshot, remain, snapshot_rb, NULL);
 
 	/*
-	 * Add a section that lists (gpuaddr, size, memtype) tuples of the
-	 * hanging process
-	 */
-	snapshot = kgsl_snapshot_add_section(device,
-			KGSL_SNAPSHOT_SECTION_MEMLIST, snapshot, remain,
-			snapshot_capture_mem_list, NULL);
-	/*
 	 * Make sure that the last IB1 that was being executed is dumped.
 	 * Since this was the last IB1 that was processed, we should have
 	 * already added it to the list during the ringbuffer parse but we
@@ -1033,6 +950,17 @@
 	for (i = 0; i < objbufptr; i++)
 		snapshot = dump_object(device, i, snapshot, remain);
 
+	/*
+	 * Only dump the istore on a hang - reading it on a running system
+	 * has a non 0 chance of hanging the GPU
+	 */
+
+	if (hang) {
+		snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_ISTORE, snapshot, remain,
+			snapshot_istore, NULL);
+	}
+
 	/* Add GPU specific sections - registers mainly, but other stuff too */
 	if (adreno_dev->gpudev->snapshot)
 		snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 53ef392..324dc5c 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -18,7 +18,7 @@
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
 #include <linux/workqueue.h>
-
+#include <linux/android_pmem.h>
 #include <linux/vmalloc.h>
 #include <linux/pm_runtime.h>
 #include <linux/genlock.h>
@@ -28,7 +28,6 @@
 #include <linux/msm_ion.h>
 #include <linux/io.h>
 #include <mach/socinfo.h>
-#include <linux/mman.h>
 
 #include "kgsl.h"
 #include "kgsl_debugfs.h"
@@ -53,52 +52,6 @@
 
 static struct ion_client *kgsl_ion_client;
 
-int kgsl_memfree_hist_init(void)
-{
-	void *base;
-
-	base = kzalloc(KGSL_MEMFREE_HIST_SIZE, GFP_KERNEL);
-	kgsl_driver.memfree_hist.base_hist_rb = base;
-	if (base == NULL)
-		return -ENOMEM;
-	kgsl_driver.memfree_hist.size = KGSL_MEMFREE_HIST_SIZE;
-	kgsl_driver.memfree_hist.wptr = base;
-	return 0;
-}
-
-void kgsl_memfree_hist_exit(void)
-{
-	kfree(kgsl_driver.memfree_hist.base_hist_rb);
-	kgsl_driver.memfree_hist.base_hist_rb = NULL;
-}
-
-void kgsl_memfree_hist_set_event(unsigned int pid, unsigned int gpuaddr,
-			unsigned int size, int flags)
-{
-	struct kgsl_memfree_hist_elem *p;
-
-	void *base = kgsl_driver.memfree_hist.base_hist_rb;
-	int rbsize = kgsl_driver.memfree_hist.size;
-
-	if (base == NULL)
-		return;
-
-	mutex_lock(&kgsl_driver.memfree_hist_mutex);
-	p = kgsl_driver.memfree_hist.wptr;
-	p->pid = pid;
-	p->gpuaddr = gpuaddr;
-	p->size = size;
-	p->flags = flags;
-
-	kgsl_driver.memfree_hist.wptr++;
-	if ((void *)kgsl_driver.memfree_hist.wptr >= base+rbsize) {
-		kgsl_driver.memfree_hist.wptr =
-			(struct kgsl_memfree_hist_elem *)base;
-	}
-	mutex_unlock(&kgsl_driver.memfree_hist_mutex);
-}
-
-
 /* kgsl_get_mem_entry - get the mem_entry structure for the specified object
  * @device - Pointer to the device structure
  * @ptbase - the pagetable base of the object
@@ -183,19 +136,9 @@
 }
 EXPORT_SYMBOL(kgsl_mem_entry_destroy);
 
-/**
- * kgsl_mem_entry_track_gpuaddr - Insert a mem_entry in the address tree
- * @process: the process that owns the memory
- * @entry: the memory entry
- *
- * Insert a kgsl_mem_entry in to the rb_tree for searching by GPU address.
- * Not all mem_entries will have gpu addresses when first created, so this
- * function may be called after creation when the GPU address is finally
- * assigned.
- */
-static void
-kgsl_mem_entry_track_gpuaddr(struct kgsl_process_private *process,
-				struct kgsl_mem_entry *entry)
+static
+void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
+				   struct kgsl_process_private *process)
 {
 	struct rb_node **node;
 	struct rb_node *parent = NULL;
@@ -220,48 +163,8 @@
 	rb_insert_color(&entry->node, &process->mem_rb);
 
 	spin_unlock(&process->mem_lock);
-}
 
-/**
- * kgsl_mem_entry_attach_process - Attach a mem_entry to its owner process
- * @entry: the memory entry
- * @process: the owner process
- *
- * Attach a newly created mem_entry to its owner process so that
- * it can be found later. The mem_entry will be added to mem_idr and have
- * its 'id' field assigned. If the GPU address has been set, the entry
- * will also be added to the mem_rb tree.
- *
- * @returns - 0 on success or error code on failure.
- */
-static int
-kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
-				   struct kgsl_process_private *process)
-{
-	int ret;
-
-	while (1) {
-		if (idr_pre_get(&process->mem_idr, GFP_KERNEL) == 0) {
-			ret = -ENOMEM;
-			goto err;
-		}
-
-		spin_lock(&process->mem_lock);
-		ret = idr_get_new_above(&process->mem_idr, entry, 1,
-					&entry->id);
-		spin_unlock(&process->mem_lock);
-
-		if (ret == 0)
-			break;
-		else if (ret != -EAGAIN)
-			goto err;
-	}
 	entry->priv = process;
-
-	if (entry->memdesc.gpuaddr != 0)
-		kgsl_mem_entry_track_gpuaddr(process, entry);
-err:
-	return ret;
 }
 
 /* Detach a memory entry from a process and unmap it from the MMU */
@@ -271,17 +174,6 @@
 	if (entry == NULL)
 		return;
 
-	spin_lock(&entry->priv->mem_lock);
-
-	if (entry->id != 0)
-		idr_remove(&entry->priv->mem_idr, entry->id);
-	entry->id = 0;
-
-	if (entry->memdesc.gpuaddr != 0)
-		rb_erase(&entry->node, &entry->priv->mem_rb);
-
-	spin_unlock(&entry->priv->mem_lock);
-
 	entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
 	entry->priv = NULL;
 
@@ -300,19 +192,14 @@
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 
-	if (context == NULL) {
-		KGSL_DRV_INFO(dev_priv->device, "kzalloc(%d) failed\n",
-				sizeof(*context));
-		return ERR_PTR(-ENOMEM);
-	}
+	if (context == NULL)
+		return NULL;
 
 	while (1) {
 		if (idr_pre_get(&dev_priv->device->context_idr,
 				GFP_KERNEL) == 0) {
-			KGSL_DRV_INFO(dev_priv->device,
-					"idr_pre_get: ENOMEM\n");
-			ret = -ENOMEM;
-			goto func_end;
+			kfree(context);
+			return NULL;
 		}
 
 		ret = idr_get_new_above(&dev_priv->device->context_idr,
@@ -322,25 +209,26 @@
 			break;
 	}
 
-	if (ret)
-		goto func_end;
+	if (ret) {
+		kfree(context);
+		return NULL;
+	}
 
 	/* MAX - 1, there is one memdesc in memstore for device info */
 	if (id >= KGSL_MEMSTORE_MAX) {
-		KGSL_DRV_INFO(dev_priv->device, "cannot have more than %d "
+		KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d "
 				"ctxts due to memstore limitation\n",
 				KGSL_MEMSTORE_MAX);
 		idr_remove(&dev_priv->device->context_idr, id);
-		ret = -ENOSPC;
-		goto func_end;
+		kfree(context);
+		return NULL;
 	}
 
 	kref_init(&context->refcount);
 	context->id = id;
 	context->dev_priv = dev_priv;
 
-	ret = kgsl_sync_timeline_create(context);
-	if (ret) {
+	if (kgsl_sync_timeline_create(context)) {
 		idr_remove(&dev_priv->device->context_idr, id);
 		goto func_end;
 	}
@@ -362,7 +250,7 @@
 func_end:
 	if (ret) {
 		kfree(context);
-		return ERR_PTR(ret);
+		return NULL;
 	}
 
 	return context;
@@ -467,6 +355,24 @@
 	return ret;
 }
 
+int kgsl_register_ts_notifier(struct kgsl_device *device,
+			      struct notifier_block *nb)
+{
+	BUG_ON(device == NULL);
+	return atomic_notifier_chain_register(&device->ts_notifier_list,
+					      nb);
+}
+EXPORT_SYMBOL(kgsl_register_ts_notifier);
+
+int kgsl_unregister_ts_notifier(struct kgsl_device *device,
+				struct notifier_block *nb)
+{
+	BUG_ON(device == NULL);
+	return atomic_notifier_chain_unregister(&device->ts_notifier_list,
+						nb);
+}
+EXPORT_SYMBOL(kgsl_unregister_ts_notifier);
+
 int kgsl_check_timestamp(struct kgsl_device *device,
 	struct kgsl_context *context, unsigned int timestamp)
 {
@@ -517,7 +423,7 @@
 			INIT_COMPLETION(device->hwaccess_gate);
 			device->ftbl->suspend_context(device);
 			device->ftbl->stop(device);
-			pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+			pm_qos_update_request(&device->pm_qos_req_dma,
 						PM_QOS_DEFAULT_VALUE);
 			kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
 			break;
@@ -666,15 +572,12 @@
 	private->pid = task_tgid_nr(current);
 	private->mem_rb = RB_ROOT;
 
-	idr_init(&private->mem_idr);
-
 	if (kgsl_mmu_enabled())
 	{
 		unsigned long pt_name;
-		struct kgsl_mmu *mmu = &cur_dev_priv->device->mmu;
 
 		pt_name = task_tgid_nr(current);
-		private->pagetable = kgsl_mmu_getpagetable(mmu, pt_name);
+		private->pagetable = kgsl_mmu_getpagetable(pt_name);
 		if (private->pagetable == NULL) {
 			kfree(private);
 			private = NULL;
@@ -697,7 +600,7 @@
 			 struct kgsl_process_private *private)
 {
 	struct kgsl_mem_entry *entry = NULL;
-	int next = 0;
+	struct rb_node *node;
 
 	if (!private)
 		return;
@@ -712,22 +615,14 @@
 
 	list_del(&private->list);
 
-	while (1) {
-		rcu_read_lock();
-		entry = idr_get_next(&private->mem_idr, &next);
-		rcu_read_unlock();
-		if (entry == NULL)
-			break;
+	for (node = rb_first(&private->mem_rb); node; ) {
+		entry = rb_entry(node, struct kgsl_mem_entry, node);
+		node = rb_next(&entry->node);
+
+		rb_erase(&entry->node, &private->mem_rb);
 		kgsl_mem_entry_detach_process(entry);
-		/*
-		 * Always start back at the beginning, to
-		 * ensure all entries are removed,
-		 * like list_for_each_entry_safe.
-		 */
-		next = 0;
 	}
 	kgsl_mmu_putpagetable(private->pagetable);
-	idr_destroy(&private->mem_idr);
 	kfree(private);
 unlock:
 	mutex_unlock(&kgsl_driver.process_mutex);
@@ -815,6 +710,13 @@
 	dev_priv->device = device;
 	filep->private_data = dev_priv;
 
+	/* Get file (per process) private struct */
+	dev_priv->process_priv = kgsl_get_process_private(dev_priv);
+	if (dev_priv->process_priv ==  NULL) {
+		result = -ENOMEM;
+		goto err_freedevpriv;
+	}
+
 	mutex_lock(&device->mutex);
 	kgsl_check_suspended(device);
 
@@ -822,45 +724,26 @@
 		kgsl_sharedmem_set(&device->memstore, 0, 0,
 				device->memstore.size);
 
-		result = device->ftbl->init(device);
-		if (result)
-			goto err_freedevpriv;
+		result = device->ftbl->start(device, true);
 
-		result = device->ftbl->start(device);
-		if (result)
-			goto err_freedevpriv;
-
+		if (result) {
+			mutex_unlock(&device->mutex);
+			goto err_putprocess;
+		}
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 	}
 	device->open_count++;
 	mutex_unlock(&device->mutex);
 
-	/*
-	 * Get file (per process) private struct. This must be done
-	 * after the first start so that the global pagetable mappings
-	 * are set up before we create the per-process pagetable.
-	 */
-	dev_priv->process_priv = kgsl_get_process_private(dev_priv);
-	if (dev_priv->process_priv ==  NULL) {
-		result = -ENOMEM;
-		goto err_stop;
-	}
-
 	KGSL_DRV_INFO(device, "Initialized %s: mmu=%s pagetable_count=%d\n",
 		device->name, kgsl_mmu_enabled() ? "on" : "off",
 		kgsl_pagetable_count);
 
 	return result;
 
-err_stop:
-	mutex_lock(&device->mutex);
-	device->open_count--;
-	if (device->open_count == 0) {
-		result = device->ftbl->stop(device);
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
-	}
+err_putprocess:
+	kgsl_put_process_private(device, dev_priv->process_priv);
 err_freedevpriv:
-	mutex_unlock(&device->mutex);
 	filep->private_data = NULL;
 	kfree(dev_priv);
 err_pmruntime:
@@ -875,7 +758,7 @@
 {
 	struct rb_node *node = private->mem_rb.rb_node;
 
-	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+	if (!kgsl_mmu_gpuaddr_in_range(gpuaddr))
 		return NULL;
 
 	while (node != NULL) {
@@ -908,77 +791,6 @@
 	return kgsl_sharedmem_find_region(private, gpuaddr, 1);
 }
 
-/**
- * kgsl_sharedmem_region_empty - Check if an addression region is empty
- *
- * @private: private data for the process to check.
- * @gpuaddr: start address of the region
- * @size: length of the region.
- *
- * Checks that there are no existing allocations within an address
- * region. Note that unlike other kgsl_sharedmem* search functions,
- * this one manages locking on its own.
- */
-int
-kgsl_sharedmem_region_empty(struct kgsl_process_private *private,
-	unsigned int gpuaddr, size_t size)
-{
-	int result = 1;
-	unsigned int gpuaddr_end = gpuaddr + size;
-
-	struct rb_node *node = private->mem_rb.rb_node;
-
-	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
-		return 0;
-
-	/* don't overflow */
-	if (gpuaddr_end < gpuaddr)
-		return 0;
-
-	spin_lock(&private->mem_lock);
-	node = private->mem_rb.rb_node;
-	while (node != NULL) {
-		struct kgsl_mem_entry *entry;
-		unsigned int memdesc_start, memdesc_end;
-
-		entry = rb_entry(node, struct kgsl_mem_entry, node);
-
-		memdesc_start = entry->memdesc.gpuaddr;
-		memdesc_end = memdesc_start
-				+ kgsl_memdesc_mmapsize(&entry->memdesc);
-
-		if (gpuaddr_end <= memdesc_start)
-			node = node->rb_left;
-		else if (memdesc_end <= gpuaddr)
-			node = node->rb_right;
-		else {
-			result = 0;
-			break;
-		}
-	}
-	spin_unlock(&private->mem_lock);
-	return result;
-}
-
-/**
- * kgsl_sharedmem_find_id - find a memory entry by id
- * @process: the owning process
- * @id: id to find
- *
- * @returns - the mem_entry or NULL
- */
-static inline struct kgsl_mem_entry *
-kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id)
-{
-	struct kgsl_mem_entry *entry;
-
-	rcu_read_lock();
-	entry = idr_find(&process->mem_idr, id);
-	rcu_read_unlock();
-
-	return entry;
-}
-
 /*call all ioctl sub functions with driver locked*/
 static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
 					  unsigned int cmd, void *data)
@@ -1133,7 +945,6 @@
 				      unsigned int cmd, void *data)
 {
 	int result = 0;
-	int i = 0;
 	struct kgsl_ringbuffer_issueibcmds *param = data;
 	struct kgsl_ibdesc *ibdesc;
 	struct kgsl_context *context;
@@ -1145,7 +956,13 @@
 	}
 
 	if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) {
+		KGSL_DRV_INFO(dev_priv->device,
+			"Using IB list mode for ib submission, numibs: %d\n",
+			param->numibs);
 		if (!param->numibs) {
+			KGSL_DRV_ERR(dev_priv->device,
+				"Invalid numibs as parameter: %d\n",
+				 param->numibs);
 			result = -EINVAL;
 			goto done;
 		}
@@ -1156,6 +973,9 @@
 		 */
 
 		if (param->numibs > 10000) {
+			KGSL_DRV_ERR(dev_priv->device,
+				"Too many IBs submitted. count: %d max 10000\n",
+				param->numibs);
 			result = -EINVAL;
 			goto done;
 		}
@@ -1195,18 +1015,6 @@
 		param->numibs = 1;
 	}
 
-	for (i = 0; i < param->numibs; i++) {
-		struct kgsl_pagetable *pt = dev_priv->process_priv->pagetable;
-
-		if (!kgsl_mmu_gpuaddr_in_range(pt, ibdesc[i].gpuaddr)) {
-			result = -ERANGE;
-			KGSL_DRV_ERR(dev_priv->device,
-				     "invalid ib base GPU virtual addr %x\n",
-				     ibdesc[i].gpuaddr);
-			goto free_ibdesc;
-		}
-	}
-
 	result = dev_priv->device->ftbl->issueibcmds(dev_priv,
 					     context,
 					     ibdesc,
@@ -1214,6 +1022,8 @@
 					     &param->timestamp,
 					     param->flags);
 
+	trace_kgsl_issueibcmds(dev_priv->device, param, ibdesc, result);
+
 free_ibdesc:
 	kfree(ibdesc);
 done:
@@ -1255,6 +1065,7 @@
 	if (context == NULL)
 		return -EINVAL;
 
+
 	return _cmdstream_readtimestamp(dev_priv, context,
 			param->type, &param->timestamp);
 }
@@ -1263,6 +1074,9 @@
 	void *priv, u32 id, u32 timestamp)
 {
 	struct kgsl_mem_entry *entry = priv;
+	spin_lock(&entry->priv->mem_lock);
+	rb_erase(&entry->node, &entry->priv->mem_rb);
+	spin_unlock(&entry->priv->mem_lock);
 	trace_kgsl_mem_timestamp_free(device, entry, id, timestamp, 0);
 	kgsl_mem_entry_detach_process(entry);
 }
@@ -1315,8 +1129,11 @@
 	struct kgsl_context *context;
 
 	context = kgsl_find_context(dev_priv, param->context_id);
-	if (context == NULL)
+	if (context == NULL) {
+		KGSL_DRV_ERR(dev_priv->device,
+			"invalid drawctxt context_id %d\n", param->context_id);
 		return -EINVAL;
+	}
 
 	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
 			context, param->timestamp, param->type);
@@ -1331,22 +1148,22 @@
 
 	context = kgsl_create_context(dev_priv);
 
-	if (IS_ERR(context)) {
-		result = PTR_ERR(context);
+	if (context == NULL) {
+		result = -ENOMEM;
 		goto done;
 	}
 
 	if (dev_priv->device->ftbl->drawctxt_create) {
 		result = dev_priv->device->ftbl->drawctxt_create(
 			dev_priv->device, dev_priv->process_priv->pagetable,
-			context, &param->flags);
+			context, param->flags);
 		if (result)
 			goto done;
 	}
 	trace_kgsl_context_create(dev_priv->device, context, param->flags);
 	param->drawctxt_id = context->id;
 done:
-	if (result && !IS_ERR(context))
+	if (result && context)
 		kgsl_context_detach(context);
 
 	return result;
@@ -1374,52 +1191,27 @@
 static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
 					unsigned int cmd, void *data)
 {
+	int result = 0;
 	struct kgsl_sharedmem_free *param = data;
 	struct kgsl_process_private *private = dev_priv->process_priv;
 	struct kgsl_mem_entry *entry = NULL;
 
 	spin_lock(&private->mem_lock);
 	entry = kgsl_sharedmem_find(private, param->gpuaddr);
+	if (entry)
+		rb_erase(&entry->node, &private->mem_rb);
+
 	spin_unlock(&private->mem_lock);
 
-	if (!entry) {
-		KGSL_MEM_INFO(dev_priv->device, "invalid gpuaddr %08x\n",
-				param->gpuaddr);
-		return -EINVAL;
+	if (entry) {
+		trace_kgsl_mem_free(entry);
+		kgsl_mem_entry_detach_process(entry);
+	} else {
+		KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
+		result = -EINVAL;
 	}
-	trace_kgsl_mem_free(entry);
 
-	kgsl_memfree_hist_set_event(entry->priv->pid,
-				    entry->memdesc.gpuaddr,
-				    entry->memdesc.size,
-				    entry->memdesc.flags);
-
-	kgsl_mem_entry_detach_process(entry);
-	return 0;
-}
-
-static long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
-					unsigned int cmd, void *data)
-{
-	struct kgsl_gpumem_free_id *param = data;
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_mem_entry *entry = NULL;
-
-	entry = kgsl_sharedmem_find_id(private, param->id);
-
-	if (!entry) {
-		KGSL_MEM_INFO(dev_priv->device, "invalid id %d\n", param->id);
-		return -EINVAL;
-	}
-	trace_kgsl_mem_free(entry);
-
-	kgsl_memfree_hist_set_event(entry->priv->pid,
-				    entry->memdesc.gpuaddr,
-				    entry->memdesc.size,
-				    entry->memdesc.flags);
-
-	kgsl_mem_entry_detach_process(entry);
-	return 0;
+	return result;
 }
 
 static struct vm_area_struct *kgsl_get_vma_from_start_addr(unsigned int addr)
@@ -1450,10 +1242,11 @@
 	dev_t rdev;
 	struct fb_info *info;
 
-	*start = 0;
-	*vstart = 0;
-	*len = 0;
 	*filep = NULL;
+#ifdef CONFIG_ANDROID_PMEM
+	if (!get_pmem_file(fd, start, vstart, len, filep))
+		return 0;
+#endif
 
 	fbfile = fget(fd);
 	if (fbfile == NULL) {
@@ -1494,8 +1287,10 @@
 
 	ret = -ERANGE;
 
-	if (phys == 0)
+	if (phys == 0) {
+		KGSL_CORE_ERR("kgsl_get_phys_file returned phys=0\n");
 		goto err;
+	}
 
 	/* Make sure the length of the region, the offset and the desired
 	 * size are all page aligned or bail
@@ -1503,13 +1298,19 @@
 	if ((len & ~PAGE_MASK) ||
 		(offset & ~PAGE_MASK) ||
 		(size & ~PAGE_MASK)) {
-		KGSL_CORE_ERR("length offset or size is not page aligned\n");
+		KGSL_CORE_ERR("length %lu, offset %u or size %u "
+				"is not page aligned\n",
+				len, offset, size);
 		goto err;
 	}
 
 	/* The size or offset can never be greater than the PMEM length */
-	if (offset >= len || size > len)
+	if (offset >= len || size > len) {
+		KGSL_CORE_ERR("offset %u or size %u "
+				"exceeds pmem length %lu\n",
+				offset, size, len);
 		goto err;
+	}
 
 	/* If size is 0, then adjust it to default to the size of the region
 	 * minus the offset.  If size isn't zero, then make sure that it will
@@ -1527,8 +1328,6 @@
 	entry->memdesc.size = size;
 	entry->memdesc.physaddr = phys + offset;
 	entry->memdesc.hostptr = (void *) (virt + offset);
-	/* USE_CPU_MAP is not impemented for PMEM. */
-	entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	ret = memdesc_sg_phys(&entry->memdesc, phys + offset, size);
 	if (ret)
@@ -1536,14 +1335,18 @@
 
 	return 0;
 err:
+#ifdef CONFIG_ANDROID_PMEM
+	put_pmem_file(filep);
+#endif
 	return ret;
 }
 
 static int memdesc_sg_virt(struct kgsl_memdesc *memdesc,
-	unsigned long paddr, int size)
+	void *addr, int size)
 {
 	int i;
 	int sglen = PAGE_ALIGN(size) / PAGE_SIZE;
+	unsigned long paddr = (unsigned long) addr;
 
 	memdesc->sg = kgsl_sg_alloc(sglen);
 
@@ -1594,33 +1397,34 @@
 	return -EINVAL;
 }
 
-static int kgsl_setup_useraddr(struct kgsl_mem_entry *entry,
+static int kgsl_setup_hostptr(struct kgsl_mem_entry *entry,
 			      struct kgsl_pagetable *pagetable,
-			      unsigned long useraddr, unsigned int offset,
+			      void *hostptr, unsigned int offset,
 			      size_t size)
 {
 	struct vm_area_struct *vma;
 	unsigned int len;
 
 	down_read(&current->mm->mmap_sem);
-	vma = find_vma(current->mm, useraddr);
+	vma = find_vma(current->mm, (unsigned int) hostptr);
 	up_read(&current->mm->mmap_sem);
 
 	if (!vma) {
-		KGSL_CORE_ERR("find_vma(%lx) failed\n", useraddr);
+		KGSL_CORE_ERR("find_vma(%p) failed\n", hostptr);
 		return -EINVAL;
 	}
 
 	/* We don't necessarily start at vma->vm_start */
-	len = vma->vm_end - useraddr;
+	len = vma->vm_end - (unsigned long) hostptr;
 
 	if (offset >= len)
 		return -EINVAL;
 
-	if (!KGSL_IS_PAGE_ALIGNED(useraddr) ||
+	if (!KGSL_IS_PAGE_ALIGNED((unsigned long) hostptr) ||
 	    !KGSL_IS_PAGE_ALIGNED(len)) {
-		KGSL_CORE_ERR("bad alignment: start(%lx) len(%u)\n",
-			      useraddr, len);
+		KGSL_CORE_ERR("user address len(%u)"
+			      "and start(%p) must be page"
+			      "aligned\n", len, hostptr);
 		return -EINVAL;
 	}
 
@@ -1641,29 +1445,28 @@
 
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = size;
-	entry->memdesc.useraddr = useraddr + (offset & PAGE_MASK);
-	if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
-		entry->memdesc.gpuaddr = entry->memdesc.useraddr;
+	entry->memdesc.hostptr = hostptr + (offset & PAGE_MASK);
 
-	return memdesc_sg_virt(&entry->memdesc, entry->memdesc.useraddr,
-				size);
+	return memdesc_sg_virt(&entry->memdesc,
+		hostptr + (offset & PAGE_MASK), size);
 }
 
 #ifdef CONFIG_ASHMEM
 static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
 			     struct kgsl_pagetable *pagetable,
-			     int fd, unsigned long useraddr, size_t size)
+			     int fd, void *hostptr, size_t size)
 {
 	int ret;
 	struct vm_area_struct *vma;
 	struct file *filep, *vmfile;
 	unsigned long len;
+	unsigned int hostaddr = (unsigned int) hostptr;
 
-	vma = kgsl_get_vma_from_start_addr(useraddr);
+	vma = kgsl_get_vma_from_start_addr(hostaddr);
 	if (vma == NULL)
 		return -EINVAL;
 
-	if (vma->vm_pgoff || vma->vm_start != useraddr) {
+	if (vma->vm_pgoff || vma->vm_start != hostaddr) {
 		KGSL_CORE_ERR("Invalid vma region\n");
 		return -EINVAL;
 	}
@@ -1674,8 +1477,8 @@
 		size = len;
 
 	if (size != len) {
-		KGSL_CORE_ERR("Invalid size %d for vma region %lx\n",
-			      size, useraddr);
+		KGSL_CORE_ERR("Invalid size %d for vma region %p\n",
+			      size, hostptr);
 		return -EINVAL;
 	}
 
@@ -1695,11 +1498,9 @@
 	entry->priv_data = filep;
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = ALIGN(size, PAGE_SIZE);
-	entry->memdesc.useraddr = useraddr;
-	if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
-		entry->memdesc.gpuaddr = entry->memdesc.useraddr;
+	entry->memdesc.hostptr = hostptr;
 
-	ret = memdesc_sg_virt(&entry->memdesc, useraddr, size);
+	ret = memdesc_sg_virt(&entry->memdesc, hostptr, size);
 	if (ret)
 		goto err;
 
@@ -1712,23 +1513,18 @@
 #else
 static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
 			     struct kgsl_pagetable *pagetable,
-			     int fd, unsigned long useraddr, size_t size)
+			     int fd, void *hostptr, size_t size)
 {
 	return -EINVAL;
 }
 #endif
 
 static int kgsl_setup_ion(struct kgsl_mem_entry *entry,
-		struct kgsl_pagetable *pagetable, void *data)
+		struct kgsl_pagetable *pagetable, int fd)
 {
 	struct ion_handle *handle;
 	struct scatterlist *s;
 	struct sg_table *sg_table;
-	struct kgsl_map_user_mem *param = data;
-	int fd = param->fd;
-
-	if (!param->len)
-		return -EINVAL;
 
 	if (IS_ERR_OR_NULL(kgsl_ion_client))
 		return -ENODEV;
@@ -1743,8 +1539,6 @@
 	entry->priv_data = handle;
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = 0;
-	/* USE_CPU_MAP is not impemented for ION. */
-	entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	sg_table = ion_sg_table(kgsl_ion_client, handle);
 
@@ -1762,8 +1556,6 @@
 		entry->memdesc.sglen++;
 	}
 
-	entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size);
-
 	return 0;
 err:
 	ion_free(kgsl_ion_client, handle);
@@ -1789,20 +1581,7 @@
 	else
 		memtype = param->memtype;
 
-	/*
-	 * Mask off unknown flags from userspace. This way the caller can
-	 * check if a flag is supported by looking at the returned flags.
-	 * Note: CACHEMODE is ignored for this call. Caching should be
-	 * determined by type of allocation being mapped.
-	 */
-	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
-			| KGSL_MEMTYPE_MASK
-			| KGSL_MEMALIGN_MASK
-			| KGSL_MEMFLAGS_USE_CPU_MAP;
-
 	entry->memdesc.flags = param->flags;
-	if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
-		entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	switch (memtype) {
 	case KGSL_USER_MEM_TYPE_PMEM:
@@ -1828,8 +1607,8 @@
 		if (param->hostptr == 0)
 			break;
 
-		result = kgsl_setup_useraddr(entry, private->pagetable,
-					    param->hostptr,
+		result = kgsl_setup_hostptr(entry, private->pagetable,
+					    (void *) param->hostptr,
 					    param->offset, param->len);
 		entry->memtype = KGSL_MEM_ENTRY_USER;
 		break;
@@ -1846,13 +1625,14 @@
 			break;
 
 		result = kgsl_setup_ashmem(entry, private->pagetable,
-					   param->fd, param->hostptr,
+					   param->fd, (void *) param->hostptr,
 					   param->len);
 
 		entry->memtype = KGSL_MEM_ENTRY_ASHMEM;
 		break;
 	case KGSL_USER_MEM_TYPE_ION:
-		result = kgsl_setup_ion(entry, private->pagetable, data);
+		result = kgsl_setup_ion(entry, private->pagetable,
+			param->fd);
 		break;
 	default:
 		KGSL_CORE_ERR("Invalid memory type: %x\n", memtype);
@@ -1867,31 +1647,27 @@
 	else if (entry->memdesc.size >= SZ_64K)
 		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64));
 
-	result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+	result = kgsl_mmu_map(private->pagetable,
+			      &entry->memdesc,
+			      GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+
 	if (result)
 		goto error_put_file_ptr;
 
 	/* Adjust the returned value for a non 4k aligned offset */
 	param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK);
-	/* echo back flags */
-	param->flags = entry->memdesc.flags;
-
-	result = kgsl_mem_entry_attach_process(entry, private);
-	if (result)
-		goto error_unmap;
 
 	KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped,
 		kgsl_driver.stats.mapped_max);
 
 	kgsl_process_add_stats(private, entry->memtype, param->len);
 
+	kgsl_mem_entry_attach_process(entry, private);
 	trace_kgsl_mem_map(entry, param->fd);
 
 	kgsl_check_idle(dev_priv->device);
 	return result;
 
-error_unmap:
-	kgsl_mmu_unmap(private->pagetable, &entry->memdesc);
 error_put_file_ptr:
 	switch (entry->memtype) {
 	case KGSL_MEM_ENTRY_PMEM:
@@ -1911,136 +1687,33 @@
 	return result;
 }
 
-static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, int op)
-{
-	int ret = 0;
-	int cacheop;
-	int mode;
-
-	/*
-	 * Flush is defined as (clean | invalidate).  If both bits are set, then
-	 * do a flush, otherwise check for the individual bits and clean or inv
-	 * as requested
-	 */
-
-	if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH)
-		cacheop = KGSL_CACHE_OP_FLUSH;
-	else if (op & KGSL_GPUMEM_CACHE_CLEAN)
-		cacheop = KGSL_CACHE_OP_CLEAN;
-	else if (op & KGSL_GPUMEM_CACHE_INV)
-		cacheop = KGSL_CACHE_OP_INV;
-	else {
-		ret = -EINVAL;
-		goto done;
-	}
-
-	mode = kgsl_memdesc_get_cachemode(&entry->memdesc);
-	if (mode != KGSL_CACHEMODE_UNCACHED
-		&& mode != KGSL_CACHEMODE_WRITECOMBINE) {
-		trace_kgsl_mem_sync_cache(entry, op);
-		kgsl_cache_range_op(&entry->memdesc, cacheop);
-	}
-
-done:
-	return ret;
-}
-
-/* New cache sync function - supports both directions (clean and invalidate) */
-
-static long
-kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
-	unsigned int cmd, void *data)
-{
-	struct kgsl_gpumem_sync_cache *param = data;
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_mem_entry *entry = NULL;
-
-	if (param->id != 0) {
-		entry = kgsl_sharedmem_find_id(private, param->id);
-		if (entry == NULL) {
-			KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
-					param->id);
-			return -EINVAL;
-		}
-	} else if (param->gpuaddr != 0) {
-		spin_lock(&private->mem_lock);
-		entry = kgsl_sharedmem_find(private, param->gpuaddr);
-		spin_unlock(&private->mem_lock);
-		if (entry == NULL) {
-			KGSL_MEM_INFO(dev_priv->device,
-					"can't find gpuaddr %x\n",
-					param->gpuaddr);
-			return -EINVAL;
-		}
-	} else {
-		return -EINVAL;
-	}
-
-	return _kgsl_gpumem_sync_cache(entry, param->op);
-}
-
-/* Legacy cache function, does a flush (clean  + invalidate) */
-
+/*This function flushes a graphics memory allocation from CPU cache
+ *when caching is enabled with MMU*/
 static long
 kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
 				 unsigned int cmd, void *data)
 {
+	int result = 0;
+	struct kgsl_mem_entry *entry;
 	struct kgsl_sharedmem_free *param = data;
 	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_mem_entry *entry = NULL;
 
 	spin_lock(&private->mem_lock);
 	entry = kgsl_sharedmem_find(private, param->gpuaddr);
-	spin_unlock(&private->mem_lock);
-	if (entry == NULL) {
-		KGSL_MEM_INFO(dev_priv->device,
-				"can't find gpuaddr %x\n",
-				param->gpuaddr);
-		return -EINVAL;
+	if (!entry) {
+		KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
+		result = -EINVAL;
+		goto done;
+	}
+	if (!entry->memdesc.hostptr) {
+		KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n",
+			param->gpuaddr);
+			goto done;
 	}
 
-	return _kgsl_gpumem_sync_cache(entry, KGSL_GPUMEM_CACHE_FLUSH);
-}
-
-/*
- * The common parts of kgsl_ioctl_gpumem_alloc and kgsl_ioctl_gpumem_alloc_id.
- */
-int
-_gpumem_alloc(struct kgsl_device_private *dev_priv,
-		struct kgsl_mem_entry **ret_entry,
-		unsigned int size, unsigned int flags)
-{
-	int result;
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_mem_entry *entry;
-
-	/*
-	 * Mask off unknown flags from userspace. This way the caller can
-	 * check if a flag is supported by looking at the returned flags.
-	 */
-	flags &= KGSL_MEMFLAGS_GPUREADONLY
-		| KGSL_CACHEMODE_MASK
-		| KGSL_MEMTYPE_MASK
-		| KGSL_MEMALIGN_MASK
-		| KGSL_MEMFLAGS_USE_CPU_MAP;
-
-	entry = kgsl_mem_entry_create();
-	if (entry == NULL)
-		return -ENOMEM;
-
-	result = kgsl_allocate_user(&entry->memdesc, private->pagetable, size,
-				    flags);
-	if (result != 0)
-		goto err;
-
-	entry->memtype = KGSL_MEM_ENTRY_KERNEL;
-
-	kgsl_check_idle(dev_priv->device);
-	*ret_entry = entry;
-	return result;
-err:
-	kfree(entry);
-	*ret_entry = NULL;
+	kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN);
+done:
+	spin_unlock(&private->mem_lock);
 	return result;
 }
 
@@ -2050,115 +1723,29 @@
 {
 	struct kgsl_process_private *private = dev_priv->process_priv;
 	struct kgsl_gpumem_alloc *param = data;
-	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_mem_entry *entry;
 	int result;
 
-	param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
-	result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
-	if (result)
-		return result;
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return -ENOMEM;
 
-	result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
-	if (result)
-		goto err;
+	result = kgsl_allocate_user(&entry->memdesc, private->pagetable,
+		param->size, param->flags);
 
-	result = kgsl_mem_entry_attach_process(entry, private);
-	if (result != 0)
-		goto err;
+	if (result == 0) {
+		entry->memtype = KGSL_MEM_ENTRY_KERNEL;
+		kgsl_mem_entry_attach_process(entry, private);
+		param->gpuaddr = entry->memdesc.gpuaddr;
 
-	kgsl_process_add_stats(private, entry->memtype, param->size);
-	trace_kgsl_mem_alloc(entry);
+		kgsl_process_add_stats(private, entry->memtype, param->size);
+		trace_kgsl_mem_alloc(entry);
+	} else
+		kfree(entry);
 
-	param->gpuaddr = entry->memdesc.gpuaddr;
-	param->size = entry->memdesc.size;
-	param->flags = entry->memdesc.flags;
-	return result;
-err:
-	kgsl_sharedmem_free(&entry->memdesc);
-	kfree(entry);
+	kgsl_check_idle(dev_priv->device);
 	return result;
 }
-
-static long
-kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
-			unsigned int cmd, void *data)
-{
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_gpumem_alloc_id *param = data;
-	struct kgsl_mem_entry *entry = NULL;
-	int result;
-
-	if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
-		param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
-
-	result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
-	if (result != 0)
-		goto err;
-
-	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
-		result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
-		if (result)
-			goto err;
-	}
-
-	result = kgsl_mem_entry_attach_process(entry, private);
-	if (result != 0)
-		goto err;
-
-	kgsl_process_add_stats(private, entry->memtype, param->size);
-	trace_kgsl_mem_alloc(entry);
-
-	param->id = entry->id;
-	param->flags = entry->memdesc.flags;
-	param->size = entry->memdesc.size;
-	param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
-	param->gpuaddr = entry->memdesc.gpuaddr;
-	return result;
-err:
-	if (entry)
-		kgsl_sharedmem_free(&entry->memdesc);
-	kfree(entry);
-	return result;
-}
-
-static long
-kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
-			unsigned int cmd, void *data)
-{
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_gpumem_get_info *param = data;
-	struct kgsl_mem_entry *entry = NULL;
-	int result = 0;
-
-	if (param->id != 0) {
-		entry = kgsl_sharedmem_find_id(private, param->id);
-		if (entry == NULL) {
-			KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
-					param->id);
-			return -EINVAL;
-		}
-	} else if (param->gpuaddr != 0) {
-		spin_lock(&private->mem_lock);
-		entry = kgsl_sharedmem_find(private, param->gpuaddr);
-		spin_unlock(&private->mem_lock);
-		if (entry == NULL) {
-			KGSL_MEM_INFO(dev_priv->device,
-					"can't find gpuaddr %lx\n",
-					param->gpuaddr);
-			return -EINVAL;
-		}
-	} else {
-		return -EINVAL;
-	}
-	param->gpuaddr = entry->memdesc.gpuaddr;
-	param->id = entry->id;
-	param->flags = entry->memdesc.flags;
-	param->size = entry->memdesc.size;
-	param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
-	param->useraddr = entry->memdesc.useraddr;
-	return result;
-}
-
 static long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv,
 					unsigned int cmd, void *data)
 {
@@ -2322,7 +1909,7 @@
 static const struct {
 	unsigned int cmd;
 	kgsl_ioctl_func_t func;
-	unsigned int flags;
+	int flags;
 } kgsl_ioctl_funcs[] = {
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
 			kgsl_ioctl_device_getproperty,
@@ -2373,15 +1960,7 @@
 			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
 			kgsl_ioctl_device_setproperty,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
-	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID,
-			kgsl_ioctl_gpumem_alloc_id, 0),
-	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
-			kgsl_ioctl_gpumem_free_id, 0),
-	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO,
-			kgsl_ioctl_gpumem_get_info, 0),
-	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE,
-			kgsl_ioctl_gpumem_sync_cache, 0),
+			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE)
 };
 
 static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -2473,11 +2052,7 @@
 		mutex_unlock(&dev_priv->device->mutex);
 	}
 
-	/*
-	 * Still copy back on failure, but assume function took
-	 * all necessary precautions sanitizing the return values.
-	 */
-	if (cmd & IOC_OUT) {
+	if (ret == 0 && (cmd & IOC_OUT)) {
 		if (copy_to_user((void __user *) arg, uptr, _IOC_SIZE(cmd)))
 			ret = -EFAULT;
 	}
@@ -2545,8 +2120,6 @@
 kgsl_gpumem_vm_close(struct vm_area_struct *vma)
 {
 	struct kgsl_mem_entry *entry  = vma->vm_private_data;
-
-	entry->memdesc.useraddr = 0;
 	kgsl_mem_entry_put(entry);
 }
 
@@ -2556,145 +2129,8 @@
 	.close = kgsl_gpumem_vm_close,
 };
 
-static int
-get_mmap_entry(struct kgsl_process_private *private,
-		struct kgsl_mem_entry **out_entry, unsigned long pgoff,
-		unsigned long len)
-{
-	int ret = -EINVAL;
-	struct kgsl_mem_entry *entry;
-
-	entry = kgsl_sharedmem_find_id(private, pgoff);
-	if (entry == NULL) {
-		spin_lock(&private->mem_lock);
-		entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT);
-		spin_unlock(&private->mem_lock);
-	}
-
-	if (!entry)
-		return -EINVAL;
-
-	kgsl_mem_entry_get(entry);
-
-	if (!entry->memdesc.ops ||
-		!entry->memdesc.ops->vmflags ||
-		!entry->memdesc.ops->vmfault) {
-		ret = -EINVAL;
-		goto err_put;
-	}
-
-	if (entry->memdesc.useraddr != 0) {
-		ret = -EBUSY;
-		goto err_put;
-	}
-
-	if (len != kgsl_memdesc_mmapsize(&entry->memdesc)) {
-		ret = -ERANGE;
-		goto err_put;
-	}
-
-	*out_entry = entry;
-	return 0;
-err_put:
-	kgsl_mem_entry_put(entry);
-	return ret;
-}
-
-static unsigned long
-kgsl_get_unmapped_area(struct file *file, unsigned long addr,
-			unsigned long len, unsigned long pgoff,
-			unsigned long flags)
-{
-	unsigned long ret = 0;
-	unsigned long vma_offset = pgoff << PAGE_SHIFT;
-	struct kgsl_device_private *dev_priv = file->private_data;
-	struct kgsl_process_private *private = dev_priv->process_priv;
-	struct kgsl_device *device = dev_priv->device;
-	struct kgsl_mem_entry *entry = NULL;
-	unsigned int align;
-	unsigned int retry = 0;
-
-	if (vma_offset == device->memstore.gpuaddr)
-		return get_unmapped_area(NULL, addr, len, pgoff, flags);
-
-	ret = get_mmap_entry(private, &entry, pgoff, len);
-	if (ret)
-		return ret;
-
-	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc) || (flags & MAP_FIXED)) {
-		/*
-		 * If we're not going to use the same mapping on the gpu,
-		 * any address is fine.
-		 * For MAP_FIXED, hopefully the caller knows what they're doing,
-		 * but we may fail in mmap() if there is already something
-		 * at the virtual address chosen.
-		 */
-		ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
-		goto put;
-	}
-	if (entry->memdesc.gpuaddr != 0) {
-		KGSL_MEM_INFO(device,
-				"pgoff %lx already mapped to gpuaddr %x\n",
-				pgoff, entry->memdesc.gpuaddr);
-		ret = -EBUSY;
-		goto put;
-	}
-
-	align = kgsl_memdesc_get_align(&entry->memdesc);
-	if (align >= ilog2(SZ_1M))
-		align = ilog2(SZ_1M);
-	else if (align >= ilog2(SZ_64K))
-		align = ilog2(SZ_64K);
-	else if (align <= PAGE_SHIFT)
-		align = 0;
-
-	if (align)
-		len += 1 << align;
-	do {
-		ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
-		if (IS_ERR_VALUE(ret))
-			break;
-		if (align)
-			ret = ALIGN(ret, (1 << align));
-
-		/*make sure there isn't a GPU only mapping at this address */
-		if (kgsl_sharedmem_region_empty(private, ret, len))
-			break;
-
-		trace_kgsl_mem_unmapped_area_collision(entry, addr, len, ret);
-
-		/*
-		 * If we collided, bump the hint address so that
-		 * get_umapped_area knows to look somewhere else.
-		 */
-		addr = (addr == 0) ? ret + len : addr + len;
-
-		/*
-		 * The addr hint can be set by userspace to be near
-		 * the end of the address space. Make sure we search
-		 * the whole address space at least once by wrapping
-		 * back around once.
-		 */
-		if (!retry && (addr + len >= TASK_SIZE)) {
-			addr = 0;
-			retry = 1;
-		} else {
-			ret = -EBUSY;
-		}
-	} while (addr + len < TASK_SIZE);
-
-	if (IS_ERR_VALUE(ret))
-		KGSL_MEM_INFO(device,
-				"pid %d pgoff %lx len %ld failed error %ld\n",
-				private->pid, pgoff, len, ret);
-put:
-	kgsl_mem_entry_put(entry);
-	return ret;
-}
-
 static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
 {
-	unsigned int ret, cache;
 	unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT;
 	struct kgsl_device_private *dev_priv = file->private_data;
 	struct kgsl_process_private *private = dev_priv->process_priv;
@@ -2706,76 +2142,31 @@
 	if (vma_offset == device->memstore.gpuaddr)
 		return kgsl_mmap_memstore(device, vma);
 
-	ret = get_mmap_entry(private, &entry, vma->vm_pgoff,
-				vma->vm_end - vma->vm_start);
-	if (ret)
-		return ret;
+	/* Find a chunk of GPU memory */
 
-	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
-		entry->memdesc.gpuaddr = vma->vm_start;
+	spin_lock(&private->mem_lock);
+	entry = kgsl_sharedmem_find(private, vma_offset);
 
-		ret = kgsl_mmu_map(private->pagetable, &entry->memdesc);
-		if (ret) {
-			kgsl_mem_entry_put(entry);
-			return ret;
-		}
-		kgsl_mem_entry_track_gpuaddr(private, entry);
-	}
+	if (entry)
+		kgsl_mem_entry_get(entry);
+
+	spin_unlock(&private->mem_lock);
+
+	if (entry == NULL)
+		return -EINVAL;
+
+	if (!entry->memdesc.ops ||
+		!entry->memdesc.ops->vmflags ||
+		!entry->memdesc.ops->vmfault)
+		return -EINVAL;
 
 	vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc);
 
 	vma->vm_private_data = entry;
-
-	/* Determine user-side caching policy */
-
-	cache = kgsl_memdesc_get_cachemode(&entry->memdesc);
-
-	switch (cache) {
-	case KGSL_CACHEMODE_UNCACHED:
-		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-		break;
-	case KGSL_CACHEMODE_WRITETHROUGH:
-		vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot);
-		break;
-	case KGSL_CACHEMODE_WRITEBACK:
-		vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot);
-		break;
-	case KGSL_CACHEMODE_WRITECOMBINE:
-	default:
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-		break;
-	}
-
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
 	vma->vm_ops = &kgsl_gpumem_vm_ops;
-
-	if (cache == KGSL_CACHEMODE_WRITEBACK
-		|| cache == KGSL_CACHEMODE_WRITETHROUGH) {
-		struct scatterlist *s;
-		int i;
-		int sglen = entry->memdesc.sglen;
-		unsigned long addr = vma->vm_start;
-
-		/* don't map in the guard page, it should always fault */
-		if (kgsl_memdesc_has_guard_page(&entry->memdesc))
-			sglen--;
-
-		for_each_sg(entry->memdesc.sg, s, sglen, i) {
-			int j;
-			for (j = 0; j < (sg_dma_len(s) >> PAGE_SHIFT); j++) {
-				struct page *page = sg_page(s);
-				page = nth_page(page, j);
-				vm_insert_page(vma, addr, page);
-				addr += PAGE_SIZE;
-			}
-		}
-	}
-
 	vma->vm_file = file;
 
-	entry->memdesc.useraddr = vma->vm_start;
-
-	trace_kgsl_mem_mmap(entry);
-
 	return 0;
 }
 
@@ -2792,7 +2183,6 @@
 	.release = kgsl_release,
 	.open = kgsl_open,
 	.mmap = kgsl_mmap,
-	.get_unmapped_area = kgsl_get_unmapped_area,
 	.unlocked_ioctl = kgsl_ioctl,
 };
 
@@ -2800,8 +2190,6 @@
 	.process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex),
 	.ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock),
 	.devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock),
-	.memfree_hist_mutex =
-		__MUTEX_INITIALIZER(kgsl_driver.memfree_hist_mutex),
 };
 EXPORT_SYMBOL(kgsl_driver);
 
@@ -2884,7 +2272,6 @@
 
 	kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME);
 
-	/* Get starting physical address of device registers */
 	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
 					   device->iomemname);
 	if (res == NULL) {
@@ -2902,33 +2289,6 @@
 	device->reg_phys = res->start;
 	device->reg_len = resource_size(res);
 
-	/*
-	 * Check if a shadermemname is defined, and then get shader memory
-	 * details including shader memory starting physical address
-	 * and shader memory length
-	 */
-	if (device->shadermemname != NULL) {
-		res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-						device->shadermemname);
-
-		if (res == NULL) {
-			KGSL_DRV_ERR(device,
-			"Shader memory: platform_get_resource_byname failed\n");
-		}
-
-		else {
-			device->shader_mem_phys = res->start;
-			device->shader_mem_len = resource_size(res);
-		}
-
-		if (!devm_request_mem_region(device->dev,
-					device->shader_mem_phys,
-					device->shader_mem_len,
-						device->name)) {
-			KGSL_DRV_ERR(device, "request_mem_region_failed\n");
-		}
-	}
-
 	if (!devm_request_mem_region(device->dev, device->reg_phys,
 				device->reg_len, device->name)) {
 		KGSL_DRV_ERR(device, "request_mem_region failed\n");
@@ -2974,6 +2334,7 @@
 	if (result)
 		goto error_pwrctrl_close;
 
+	kgsl_cffdump_open(device->id);
 
 	setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
 	status = kgsl_create_device_workqueue(device);
@@ -2995,8 +2356,7 @@
 		goto error_close_mmu;
 	}
 
-	pm_qos_add_request(&device->pwrctrl.pm_qos_req_dma,
-				PM_QOS_CPU_DMA_LATENCY,
+	pm_qos_add_request(&device->pm_qos_req_dma, PM_QOS_CPU_DMA_LATENCY,
 				PM_QOS_DEFAULT_VALUE);
 
 	/* Initalize the snapshot engine */
@@ -3046,15 +2406,11 @@
 	if (device->pm_dump_enable) {
 
 		KGSL_LOG_DUMP(device,
-			"POWER: NAP ALLOWED = %d | START_STOP_SLEEP_WAKE = %d\n"
-			, pwr->nap_allowed, pwr->strtstp_sleepwake);
-
-		KGSL_LOG_DUMP(device,
-			"POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
-			pwr->power_flags, pwr->active_pwrlevel);
+				"POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
+				pwr->power_flags, pwr->active_pwrlevel);
 
 		KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
-			pwr->interval_timeout);
+				pwr->interval_timeout);
 
 	}
 
@@ -3102,9 +2458,10 @@
 {
 	kgsl_device_snapshot_close(device);
 
+	kgsl_cffdump_close(device->id);
 	kgsl_pwrctrl_uninit_sysfs(device);
 
-	pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
+	pm_qos_remove_request(&device->pm_qos_req_dma);
 
 	idr_destroy(&device->context_idr);
 
@@ -3157,7 +2514,6 @@
 		kgsl_driver.class = NULL;
 	}
 
-	kgsl_memfree_hist_exit();
 	unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX);
 }
 
@@ -3229,9 +2585,6 @@
 			goto err;
 	}
 
-	if (kgsl_memfree_hist_init())
-		KGSL_CORE_ERR("failed to init memfree_hist");
-
 	return 0;
 
 err:
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index c568db5..3935164 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -71,23 +71,6 @@
 #define KGSL_STATS_ADD(_size, _stat, _max) \
 	do { _stat += (_size); if (_stat > _max) _max = _stat; } while (0)
 
-
-#define KGSL_MEMFREE_HIST_SIZE	((int)(PAGE_SIZE * 2))
-
-struct kgsl_memfree_hist_elem {
-	unsigned int pid;
-	unsigned int gpuaddr;
-	unsigned int size;
-	unsigned int flags;
-};
-
-struct kgsl_memfree_hist {
-	void *base_hist_rb;
-	unsigned int size;
-	struct kgsl_memfree_hist_elem *wptr;
-};
-
-
 struct kgsl_device;
 struct kgsl_context;
 
@@ -116,9 +99,6 @@
 
 	void *ptpool;
 
-	struct mutex memfree_hist_mutex;
-	struct kgsl_memfree_hist memfree_hist;
-
 	struct {
 		unsigned int vmalloc;
 		unsigned int vmalloc_max;
@@ -149,16 +129,13 @@
 #define KGSL_MEMDESC_GUARD_PAGE BIT(0)
 /* Set if the memdesc is mapped into all pagetables */
 #define KGSL_MEMDESC_GLOBAL BIT(1)
-/* The memdesc is frozen during a snapshot */
-#define KGSL_MEMDESC_FROZEN BIT(2)
 
 /* shared memory allocation */
 struct kgsl_memdesc {
 	struct kgsl_pagetable *pagetable;
-	void *hostptr; /* kernel virtual address */
-	unsigned long useraddr; /* userspace address */
+	void *hostptr;
 	unsigned int gpuaddr;
-	phys_addr_t physaddr;
+	unsigned int physaddr;
 	unsigned int size;
 	unsigned int priv; /* Internal flags and settings */
 	struct scatterlist *sg;
@@ -177,13 +154,17 @@
 #define KGSL_MEM_ENTRY_ION    4
 #define KGSL_MEM_ENTRY_MAX    5
 
+/* List of flags */
+
+#define KGSL_MEM_ENTRY_FROZEN (1 << 0)
+
 struct kgsl_mem_entry {
 	struct kref refcount;
 	struct kgsl_memdesc memdesc;
 	int memtype;
+	int flags;
 	void *priv_data;
 	struct rb_node node;
-	unsigned int id;
 	unsigned int context_id;
 	/* back pointer to private structure under whose context this
 	* allocation is made */
@@ -243,10 +224,6 @@
 static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
 				unsigned int gpuaddr, unsigned int size)
 {
-	/* don't overflow */
-	if ((gpuaddr + size) < gpuaddr)
-		return 0;
-
 	if (gpuaddr >= memdesc->gpuaddr &&
 	    ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
 		return 1;
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
index 6dc2ccc..e06c94d 100644
--- a/drivers/gpu/msm/kgsl_cffdump.c
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -28,7 +28,6 @@
 #include "kgsl_log.h"
 #include "kgsl_sharedmem.h"
 #include "adreno_pm4types.h"
-#include "adreno.h"
 
 static struct rchan	*chan;
 static struct dentry	*dir;
@@ -335,7 +334,7 @@
 		return;
 	}
 
-	kgsl_cff_dump_enable = 0;
+	kgsl_cff_dump_enable = 1;
 
 	spin_lock_init(&cffdump_lock);
 
@@ -357,21 +356,10 @@
 		debugfs_remove(dir);
 }
 
-void kgsl_cffdump_open(struct kgsl_device *device)
+void kgsl_cffdump_open(enum kgsl_deviceid device_id)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-		kgsl_cffdump_memory_base(device->id,
-			kgsl_mmu_get_base_addr(&device->mmu),
-			kgsl_mmu_get_ptsize(&device->mmu) +
-			KGSL_IOMMU_GLOBAL_MEM_SIZE, adreno_dev->gmem_size);
-	} else {
-		kgsl_cffdump_memory_base(device->id,
-			kgsl_mmu_get_base_addr(&device->mmu),
-			kgsl_mmu_get_ptsize(&device->mmu),
-			adreno_dev->gmem_size);
-	}
+	kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE,
+			kgsl_mmu_get_ptsize(), SZ_256K);
 }
 
 void kgsl_cffdump_memory_base(enum kgsl_deviceid device_id, unsigned int base,
@@ -399,7 +387,7 @@
 }
 
 void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
-	struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes,
+	const struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes,
 	bool clean_cache)
 {
 	const void *src;
@@ -534,7 +522,7 @@
 }
 
 static struct dentry *create_buf_file_handler(const char *filename,
-	struct dentry *parent, unsigned short mode, struct rchan_buf *buf,
+	struct dentry *parent, int mode, struct rchan_buf *buf,
 	int *is_global)
 {
 	return debugfs_create_file(filename, mode, parent, buf,
diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h
index d5656f8..2733cc3 100644
--- a/drivers/gpu/msm/kgsl_cffdump.h
+++ b/drivers/gpu/msm/kgsl_cffdump.h
@@ -22,10 +22,10 @@
 
 void kgsl_cffdump_init(void);
 void kgsl_cffdump_destroy(void);
-void kgsl_cffdump_open(struct kgsl_device *device);
+void kgsl_cffdump_open(enum kgsl_deviceid device_id);
 void kgsl_cffdump_close(enum kgsl_deviceid device_id);
 void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
-	struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes,
+	const struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes,
 	bool clean_cache);
 void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes);
 void kgsl_cffdump_regwrite(enum kgsl_deviceid device_id, uint addr,
@@ -49,7 +49,7 @@
 
 #define kgsl_cffdump_init()					(void)0
 #define kgsl_cffdump_destroy()					(void)0
-#define kgsl_cffdump_open(device)				(void)0
+#define kgsl_cffdump_open(device_id)				(void)0
 #define kgsl_cffdump_close(device_id)				(void)0
 #define kgsl_cffdump_syncmem(dev_priv, memdesc, addr, sizebytes, clean_cache) \
 	(void) 0
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index 9dfda32..b41bd6b 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -125,52 +125,6 @@
 KGSL_DEBUGFS_LOG(pwr_log);
 KGSL_DEBUGFS_LOG(ft_log);
 
-static int memfree_hist_print(struct seq_file *s, void *unused)
-{
-	void *base = kgsl_driver.memfree_hist.base_hist_rb;
-
-	struct kgsl_memfree_hist_elem *wptr = kgsl_driver.memfree_hist.wptr;
-	struct kgsl_memfree_hist_elem *p;
-	char str[16];
-
-	seq_printf(s, "%8s %8s %8s %11s\n",
-			"pid", "gpuaddr", "size", "flags");
-
-	mutex_lock(&kgsl_driver.memfree_hist_mutex);
-	p = wptr;
-	for (;;) {
-		kgsl_get_memory_usage(str, sizeof(str), p->flags);
-		/*
-		 * if the ring buffer is not filled up yet
-		 * all its empty elems have size==0
-		 * just skip them ...
-		*/
-		if (p->size)
-			seq_printf(s, "%8d %08x %8d %11s\n",
-				p->pid, p->gpuaddr, p->size, str);
-		p++;
-		if ((void *)p >= base + kgsl_driver.memfree_hist.size)
-			p = (struct kgsl_memfree_hist_elem *) base;
-
-		if (p == kgsl_driver.memfree_hist.wptr)
-			break;
-	}
-	mutex_unlock(&kgsl_driver.memfree_hist_mutex);
-	return 0;
-}
-
-static int memfree_hist_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, memfree_hist_print, inode->i_private);
-}
-
-static const struct file_operations memfree_hist_fops = {
-	.open = memfree_hist_open,
-	.read = seq_read,
-	.llseek = seq_lseek,
-	.release = single_release,
-};
-
 void kgsl_device_debugfs_init(struct kgsl_device *device)
 {
 	if (kgsl_debugfs_dir && !IS_ERR(kgsl_debugfs_dir))
@@ -197,8 +151,6 @@
 				&mem_log_fops);
 	debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
 				&pwr_log_fops);
-	debugfs_create_file("memfree_history", 0444, device->d_debugfs, device,
-				&memfree_hist_fops);
 	debugfs_create_file("log_level_ft", 0644, device->d_debugfs, device,
 				&ft_log_fops);
 
@@ -246,71 +198,35 @@
 	return '-';
 }
 
-static char get_cacheflag(const struct kgsl_memdesc *m)
-{
-	static const char table[] = {
-		[KGSL_CACHEMODE_WRITECOMBINE] = '-',
-		[KGSL_CACHEMODE_UNCACHED] = 'u',
-		[KGSL_CACHEMODE_WRITEBACK] = 'b',
-		[KGSL_CACHEMODE_WRITETHROUGH] = 't',
-	};
-	return table[kgsl_memdesc_get_cachemode(m)];
-}
-
-static void print_mem_entry(struct seq_file *s, struct kgsl_mem_entry *entry)
-{
-	char flags[6];
-	char usage[16];
-	struct kgsl_memdesc *m = &entry->memdesc;
-
-	flags[0] = kgsl_memdesc_is_global(m) ?  'g' : '-';
-	flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
-	flags[2] = get_alignflag(m);
-	flags[3] = get_cacheflag(m);
-	flags[4] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
-	flags[5] = '\0';
-
-	kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
-
-	seq_printf(s, "%08x %08lx %8d %5d %5s %10s %16s %5d\n",
-			m->gpuaddr, m->useraddr, m->size, entry->id, flags,
-			memtype_str(entry->memtype), usage, m->sglen);
-}
-
 static int process_mem_print(struct seq_file *s, void *unused)
 {
 	struct kgsl_mem_entry *entry;
 	struct rb_node *node;
 	struct kgsl_process_private *private = s->private;
-	int next = 0;
+	char flags[4];
+	char usage[16];
 
-	seq_printf(s, "%8s %8s %8s %5s %5s %10s %16s %5s\n",
-		   "gpuaddr", "useraddr", "size", "id", "flags", "type",
-		   "usage", "sglen");
-
-	/* print all entries with a GPU address */
 	spin_lock(&private->mem_lock);
-
+	seq_printf(s, "%8s %8s %5s %10s %16s %5s\n",
+		   "gpuaddr", "size", "flags", "type", "usage", "sglen");
 	for (node = rb_first(&private->mem_rb); node; node = rb_next(node)) {
+		struct kgsl_memdesc *m;
+
 		entry = rb_entry(node, struct kgsl_mem_entry, node);
-		print_mem_entry(s, entry);
-	}
+		m = &entry->memdesc;
 
+		flags[0] = m->priv & KGSL_MEMDESC_GLOBAL ?  'g' : '-';
+		flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
+		flags[2] = get_alignflag(m);
+		flags[3] = '\0';
+
+		kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+		seq_printf(s, "%08x %8d %5s %10s %16s %5d\n",
+			   m->gpuaddr, m->size, flags,
+			   memtype_str(entry->memtype), usage, m->sglen);
+	}
 	spin_unlock(&private->mem_lock);
-
-	/* now print all the unbound entries */
-	while (1) {
-		rcu_read_lock();
-		entry = idr_get_next(&private->mem_idr, &next);
-		rcu_read_unlock();
-
-		if (entry == NULL)
-			break;
-		if (entry->memdesc.gpuaddr == 0)
-			print_mem_entry(s, entry);
-		next++;
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 0d11660..b215d8c 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -73,8 +73,7 @@
 	int (*idle) (struct kgsl_device *device);
 	unsigned int (*isidle) (struct kgsl_device *device);
 	int (*suspend_context) (struct kgsl_device *device);
-	int (*init) (struct kgsl_device *device);
-	int (*start) (struct kgsl_device *device);
+	int (*start) (struct kgsl_device *device, unsigned int init_ram);
 	int (*stop) (struct kgsl_device *device);
 	int (*getproperty) (struct kgsl_device *device,
 		enum kgsl_property_type type, void *value,
@@ -106,7 +105,7 @@
 			uint32_t flags);
 	int (*drawctxt_create) (struct kgsl_device *device,
 		struct kgsl_pagetable *pagetable, struct kgsl_context *context,
-		uint32_t *flags);
+		uint32_t flags);
 	void (*drawctxt_destroy) (struct kgsl_device *device,
 		struct kgsl_context *context);
 	long (*ioctl) (struct kgsl_device_private *dev_priv,
@@ -146,27 +145,11 @@
 	unsigned int ver_minor;
 	uint32_t flags;
 	enum kgsl_deviceid id;
-
-	/* Starting physical address for GPU registers */
 	unsigned long reg_phys;
-
-	/* Starting Kernel virtual address for GPU registers */
 	void *reg_virt;
-
-	/* Total memory size for all GPU registers */
 	unsigned int reg_len;
-
-	/* Kernel virtual address for GPU shader memory */
-	void *shader_mem_virt;
-
-	/* Starting physical address for GPU shader memory */
-	unsigned long shader_mem_phys;
-
-	/* GPU shader memory size */
-	unsigned int shader_mem_len;
 	struct kgsl_memdesc memstore;
 	const char *iomemname;
-	const char *shadermemname;
 
 	struct kgsl_mh mh;
 	struct kgsl_mmu mmu;
@@ -177,6 +160,7 @@
 	struct kgsl_pwrctrl pwrctrl;
 	int open_count;
 
+	struct atomic_notifier_head ts_notifier_list;
 	struct mutex mutex;
 	uint32_t state;
 	uint32_t requested_state;
@@ -217,6 +201,7 @@
 	int pm_dump_enable;
 	struct kgsl_pwrscale pwrscale;
 	struct kobject pwrscale_kobj;
+	struct pm_qos_request pm_qos_req_dma;
 	struct work_struct ts_expired_ws;
 	struct list_head events;
 	struct list_head events_pending_list;
@@ -225,16 +210,16 @@
 	/* Postmortem Control switches */
 	int pm_regs_enabled;
 	int pm_ib_enabled;
-
-	int reset_counter; /* Track how many GPU core resets have occured */
 };
 
 void kgsl_process_events(struct work_struct *work);
+void kgsl_check_fences(struct work_struct *work);
 
 #define KGSL_DEVICE_COMMON_INIT(_dev) \
 	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
 	.suspend_gate = COMPLETION_INITIALIZER((_dev).suspend_gate),\
 	.ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\
+	.ts_notifier_list = ATOMIC_NOTIFIER_INIT((_dev).ts_notifier_list),\
 	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
 			kgsl_idle_check),\
 	.ts_expired_ws  = __WORK_INITIALIZER((_dev).ts_expired_ws,\
@@ -281,7 +266,6 @@
 	pid_t pid;
 	spinlock_t mem_lock;
 	struct rb_root mem_rb;
-	struct idr mem_idr;
 	struct kgsl_pagetable *pagetable;
 	struct list_head list;
 	struct kobject kobj;
@@ -407,6 +391,12 @@
 int kgsl_check_timestamp(struct kgsl_device *device,
 		struct kgsl_context *context, unsigned int timestamp);
 
+int kgsl_register_ts_notifier(struct kgsl_device *device,
+			      struct notifier_block *nb);
+
+int kgsl_unregister_ts_notifier(struct kgsl_device *device,
+				struct notifier_block *nb);
+
 int kgsl_device_platform_probe(struct kgsl_device *device);
 
 void kgsl_device_platform_remove(struct kgsl_device *device);
diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c
index 11d6ffa..2a5a5fa 100644
--- a/drivers/gpu/msm/kgsl_drm.c
+++ b/drivers/gpu/msm/kgsl_drm.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2009-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -16,8 +16,7 @@
  */
 #include "drmP.h"
 #include "drm.h"
-
-#include <linux/msm_ion.h>
+#include <linux/android_pmem.h>
 
 #include "kgsl.h"
 #include "kgsl_device.h"
@@ -28,7 +27,7 @@
 #define DRIVER_AUTHOR           "Qualcomm"
 #define DRIVER_NAME             "kgsl"
 #define DRIVER_DESC             "KGSL DRM"
-#define DRIVER_DATE             "20121107"
+#define DRIVER_DATE             "20100127"
 
 #define DRIVER_MAJOR            2
 #define DRIVER_MINOR            1
@@ -107,7 +106,6 @@
 	uint32_t type;
 	struct kgsl_memdesc memdesc;
 	struct kgsl_pagetable *pagetable;
-	struct ion_handle *ion_handle;
 	uint64_t mmap_offset;
 	int bufcount;
 	int flags;
@@ -131,18 +129,86 @@
 	struct list_head wait_list;
 };
 
-static struct ion_client *kgsl_drm_ion_client;
-
 static int kgsl_drm_inited = DRM_KGSL_NOT_INITED;
 
 /* This is a global list of all the memory currently mapped in the MMU */
 static struct list_head kgsl_mem_list;
 
+static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op)
+{
+	int cacheop = 0;
+
+	switch (op) {
+	case DRM_KGSL_GEM_CACHE_OP_TO_DEV:
+		if (type & (DRM_KGSL_GEM_CACHE_WBACK |
+			    DRM_KGSL_GEM_CACHE_WBACKWA))
+			cacheop = KGSL_CACHE_OP_CLEAN;
+
+		break;
+
+	case DRM_KGSL_GEM_CACHE_OP_FROM_DEV:
+		if (type & (DRM_KGSL_GEM_CACHE_WBACK |
+			    DRM_KGSL_GEM_CACHE_WBACKWA |
+			    DRM_KGSL_GEM_CACHE_WTHROUGH))
+			cacheop = KGSL_CACHE_OP_INV;
+	}
+
+	kgsl_cache_range_op(memdesc, cacheop);
+}
+
+/* TODO:
+ * Add vsync wait */
+
+static int kgsl_drm_load(struct drm_device *dev, unsigned long flags)
+{
+	return 0;
+}
+
+static int kgsl_drm_unload(struct drm_device *dev)
+{
+	return 0;
+}
+
 struct kgsl_drm_device_priv {
 	struct kgsl_device *device[KGSL_DEVICE_MAX];
 	struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX];
 };
 
+void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+}
+
+static int kgsl_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+	return 0;
+}
+
+static int kgsl_drm_resume(struct drm_device *dev)
+{
+	return 0;
+}
+
+static void
+kgsl_gem_free_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_kgsl_gem_object *priv = obj->driver_private;
+	struct drm_map_list *list;
+
+	list = &obj->map_list;
+	drm_ht_remove_item(&mm->offset_hash, &list->hash);
+	if (list->file_offset_node) {
+		drm_mm_put_block(list->file_offset_node);
+		list->file_offset_node = NULL;
+	}
+
+	kfree(list->map);
+	list->map = NULL;
+
+	priv->mmap_offset = 0;
+}
+
 static int
 kgsl_gem_memory_allocated(struct drm_gem_object *obj)
 {
@@ -154,8 +220,6 @@
 kgsl_gem_alloc_memory(struct drm_gem_object *obj)
 {
 	struct drm_kgsl_gem_object *priv = obj->driver_private;
-	struct sg_table *sg_table;
-	struct scatterlist *s;
 	int index;
 	int result = 0;
 
@@ -173,52 +237,21 @@
 		}
 	}
 
+	/* Set the flags for the memdesc (probably 0, unless it is cached) */
+	priv->memdesc.priv = 0;
+
 	if (TYPE_IS_PMEM(priv->type)) {
 		if (priv->type == DRM_KGSL_GEM_TYPE_EBI ||
 		    priv->type & DRM_KGSL_GEM_PMEM_EBI) {
-			priv->ion_handle = ion_alloc(kgsl_drm_ion_client,
-				obj->size * priv->bufcount, PAGE_SIZE,
-				ION_HEAP(ION_SF_HEAP_ID), 0);
-			if (IS_ERR_OR_NULL(priv->ion_handle)) {
-				DRM_ERROR(
-				"Unable to allocate ION Phys memory handle\n");
-				return -ENOMEM;
-			}
-
-			priv->memdesc.pagetable = priv->pagetable;
-
-			result = ion_phys(kgsl_drm_ion_client,
-				priv->ion_handle, (ion_phys_addr_t *)
-				&priv->memdesc.physaddr, &priv->memdesc.size);
-			if (result) {
-				DRM_ERROR(
-				"Unable to get ION Physical memory address\n");
-				ion_free(kgsl_drm_ion_client,
-					priv->ion_handle);
-				priv->ion_handle = NULL;
-				return result;
-			}
-
-			result = memdesc_sg_phys(&priv->memdesc,
-				priv->memdesc.physaddr, priv->memdesc.size);
-			if (result) {
-				DRM_ERROR(
-				"Unable to get sg list\n");
-				ion_free(kgsl_drm_ion_client,
-					priv->ion_handle);
-				priv->ion_handle = NULL;
-				return result;
-			}
-
-			result = kgsl_mmu_map(priv->pagetable, &priv->memdesc);
-			if (result) {
-				DRM_ERROR(
-				"kgsl_mmu_map failed.  result = %d\n", result);
-				ion_free(kgsl_drm_ion_client,
-					priv->ion_handle);
-				priv->ion_handle = NULL;
-				return result;
-			}
+				result = kgsl_sharedmem_ebimem_user(
+						&priv->memdesc,
+						priv->pagetable,
+						obj->size * priv->bufcount);
+				if (result) {
+					DRM_ERROR(
+					"Unable to allocate PMEM memory\n");
+					return result;
+				}
 		}
 		else
 			return -EINVAL;
@@ -229,44 +262,15 @@
 			priv->type & DRM_KGSL_GEM_CACHE_MASK)
 				list_add(&priv->list, &kgsl_mem_list);
 
-		priv->memdesc.pagetable = priv->pagetable;
+		result = kgsl_sharedmem_page_alloc_user(&priv->memdesc,
+					priv->pagetable,
+					obj->size * priv->bufcount);
 
-		priv->ion_handle = ion_alloc(kgsl_drm_ion_client,
-				obj->size * priv->bufcount, PAGE_SIZE,
-				ION_HEAP(ION_IOMMU_HEAP_ID), 0);
-		if (IS_ERR_OR_NULL(priv->ion_handle)) {
-			DRM_ERROR(
-				"Unable to allocate ION IOMMU memory handle\n");
-				return -ENOMEM;
+		if (result != 0) {
+				DRM_ERROR(
+				"Unable to allocate Vmalloc user memory\n");
+				return result;
 		}
-
-		sg_table = ion_sg_table(kgsl_drm_ion_client,
-				priv->ion_handle);
-		if (IS_ERR_OR_NULL(priv->ion_handle)) {
-			DRM_ERROR(
-			"Unable to get ION sg table\n");
-			goto memerr;
-		}
-
-		priv->memdesc.sg = sg_table->sgl;
-
-		/* Calculate the size of the memdesc from the sglist */
-
-		priv->memdesc.sglen = 0;
-
-		for (s = priv->memdesc.sg; s != NULL; s = sg_next(s)) {
-			priv->memdesc.size += s->length;
-			priv->memdesc.sglen++;
-		}
-
-		result = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
-				GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-		if (result) {
-			DRM_ERROR(
-			"kgsl_mmu_map failed.  result = %d\n", result);
-			goto memerr;
-		}
-
 	} else
 		return -EINVAL;
 
@@ -278,15 +282,7 @@
 	}
 	priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
 
-
 	return 0;
-
-memerr:
-	ion_free(kgsl_drm_ion_client,
-		priv->ion_handle);
-	priv->ion_handle = NULL;
-	return -ENOMEM;
-
 }
 
 static void
@@ -297,19 +293,10 @@
 	if (!kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
 		return;
 
-	if (priv->memdesc.gpuaddr)
-		kgsl_mmu_unmap(priv->memdesc.pagetable, &priv->memdesc);
+	kgsl_gem_mem_flush(&priv->memdesc,  priv->type,
+			   DRM_KGSL_GEM_CACHE_OP_FROM_DEV);
 
-	/* ION will take care of freeing the sg table. */
-	priv->memdesc.sg = NULL;
-	priv->memdesc.sglen = 0;
-
-	if (priv->ion_handle)
-		ion_free(kgsl_drm_ion_client, priv->ion_handle);
-
-	priv->ion_handle = NULL;
-
-	memset(&priv->memdesc, 0, sizeof(priv->memdesc));
+	kgsl_sharedmem_free(&priv->memdesc);
 
 	kgsl_mmu_putpagetable(priv->pagetable);
 	priv->pagetable = NULL;
@@ -342,10 +329,66 @@
 kgsl_gem_free_object(struct drm_gem_object *obj)
 {
 	kgsl_gem_free_memory(obj);
+	kgsl_gem_free_mmap_offset(obj);
 	drm_gem_object_release(obj);
 	kfree(obj->driver_private);
 }
 
+static int
+kgsl_gem_create_mmap_offset(struct drm_gem_object *obj)
+{
+	struct drm_device *dev = obj->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_kgsl_gem_object *priv = obj->driver_private;
+	struct drm_map_list *list;
+	int msize;
+
+	list = &obj->map_list;
+	list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
+	if (list->map == NULL) {
+		DRM_ERROR("Unable to allocate drm_map_list\n");
+		return -ENOMEM;
+	}
+
+	msize = obj->size * priv->bufcount;
+
+	list->map->type = _DRM_GEM;
+	list->map->size = msize;
+	list->map->handle = obj;
+
+	/* Allocate a mmap offset */
+	list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+						    msize / PAGE_SIZE,
+						    0, 0);
+
+	if (!list->file_offset_node) {
+		DRM_ERROR("Failed to allocate offset for %d\n", obj->name);
+		kfree(list->map);
+		return -ENOMEM;
+	}
+
+	list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+						  msize / PAGE_SIZE, 0);
+
+	if (!list->file_offset_node) {
+		DRM_ERROR("Unable to create the file_offset_node\n");
+		kfree(list->map);
+		return -ENOMEM;
+	}
+
+	list->hash.key = list->file_offset_node->start;
+	if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+		DRM_ERROR("Failed to add to map hash\n");
+		drm_mm_put_block(list->file_offset_node);
+		kfree(list->map);
+		return -ENOMEM;
+	}
+
+	priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
+	return 0;
+}
+
 int
 kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
 			unsigned long *len)
@@ -392,6 +435,9 @@
 			priv->bufs[priv->active].offset;
 
 		*len = priv->memdesc.size;
+
+		kgsl_gem_mem_flush(&priv->memdesc,
+				   priv->type, DRM_KGSL_GEM_CACHE_OP_TO_DEV);
 	} else {
 		*start = 0;
 		*len = 0;
@@ -422,7 +468,10 @@
 	priv->active = 0;
 	priv->bound = 0;
 
-	priv->type = DRM_KGSL_GEM_TYPE_KMEM;
+	/* To preserve backwards compatability, the default memory source
+	   is EBI */
+
+	priv->type = DRM_KGSL_GEM_TYPE_PMEM | DRM_KGSL_GEM_PMEM_EBI;
 
 	ret = drm_gem_handle_create(file_priv, obj, handle);
 
@@ -464,11 +513,8 @@
 	}
 
 	ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
-	if (ret) {
-		drm_gem_object_release(obj);
-		DRM_ERROR("Unable to initialize GEM object ret = %d\n", ret);
+	if (ret)
 		return ret;
-	}
 
 	create->handle = handle;
 	return 0;
@@ -541,149 +587,6 @@
 }
 
 int
-kgsl_gem_create_from_ion_ioctl(struct drm_device *dev, void *data,
-		      struct drm_file *file_priv)
-{
-	struct drm_kgsl_gem_create_from_ion *args = data;
-	struct drm_gem_object *obj;
-	struct ion_handle *ion_handle;
-	struct drm_kgsl_gem_object *priv;
-	struct sg_table *sg_table;
-	struct scatterlist *s;
-	int ret, handle;
-	unsigned long size;
-
-	ion_handle = ion_import_dma_buf(kgsl_drm_ion_client, args->ion_fd);
-	if (IS_ERR_OR_NULL(ion_handle)) {
-		DRM_ERROR("Unable to import dmabuf.  Error number = %d\n",
-			(int)PTR_ERR(ion_handle));
-		return -EINVAL;
-	}
-
-	ion_handle_get_size(kgsl_drm_ion_client, ion_handle, &size);
-
-	if (size == 0) {
-		ion_free(kgsl_drm_ion_client, ion_handle);
-		DRM_ERROR(
-		"cannot create GEM object from zero size ION buffer\n");
-		return -EINVAL;
-	}
-
-	obj = drm_gem_object_alloc(dev, size);
-
-	if (obj == NULL) {
-		ion_free(kgsl_drm_ion_client, ion_handle);
-		DRM_ERROR("Unable to allocate the GEM object\n");
-		return -ENOMEM;
-	}
-
-	ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
-	if (ret) {
-		ion_free(kgsl_drm_ion_client, ion_handle);
-		drm_gem_object_release(obj);
-		DRM_ERROR("Unable to initialize GEM object ret = %d\n", ret);
-		return ret;
-	}
-
-	priv = obj->driver_private;
-	priv->ion_handle = ion_handle;
-
-	priv->type = DRM_KGSL_GEM_TYPE_KMEM;
-	list_add(&priv->list, &kgsl_mem_list);
-
-	priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
-
-	priv->memdesc.pagetable = priv->pagetable;
-
-	sg_table = ion_sg_table(kgsl_drm_ion_client,
-		priv->ion_handle);
-	if (IS_ERR_OR_NULL(priv->ion_handle)) {
-		DRM_ERROR("Unable to get ION sg table\n");
-		ion_free(kgsl_drm_ion_client,
-			priv->ion_handle);
-		priv->ion_handle = NULL;
-		kgsl_mmu_putpagetable(priv->pagetable);
-		drm_gem_object_release(obj);
-		kfree(priv);
-		return -ENOMEM;
-	}
-
-	priv->memdesc.sg = sg_table->sgl;
-
-	/* Calculate the size of the memdesc from the sglist */
-
-	priv->memdesc.sglen = 0;
-
-	for (s = priv->memdesc.sg; s != NULL; s = sg_next(s)) {
-		priv->memdesc.size += s->length;
-		priv->memdesc.sglen++;
-	}
-
-	ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
-		GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-	if (ret) {
-		DRM_ERROR("kgsl_mmu_map failed.  ret = %d\n", ret);
-		ion_free(kgsl_drm_ion_client,
-			priv->ion_handle);
-		priv->ion_handle = NULL;
-		kgsl_mmu_putpagetable(priv->pagetable);
-		drm_gem_object_release(obj);
-		kfree(priv);
-		return -ENOMEM;
-	}
-
-	priv->bufs[0].offset = 0;
-	priv->bufs[0].gpuaddr = priv->memdesc.gpuaddr;
-	priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
-
-	args->handle = handle;
-	return 0;
-}
-
-int
-kgsl_gem_get_ion_fd_ioctl(struct drm_device *dev, void *data,
-			struct drm_file *file_priv)
-{
-	struct drm_kgsl_gem_get_ion_fd *args = data;
-	struct drm_gem_object *obj;
-	struct drm_kgsl_gem_object *priv;
-	int ret = 0;
-
-	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
-
-	if (obj == NULL) {
-		DRM_ERROR("Invalid GEM handle %x\n", args->handle);
-		return -EBADF;
-	}
-
-	mutex_lock(&dev->struct_mutex);
-	priv = obj->driver_private;
-
-	if (TYPE_IS_FD(priv->type))
-		ret = -EINVAL;
-	else if (TYPE_IS_PMEM(priv->type) || TYPE_IS_MEM(priv->type)) {
-		if (priv->ion_handle) {
-			args->ion_fd = ion_share_dma_buf(
-				kgsl_drm_ion_client, priv->ion_handle);
-			if (args->ion_fd < 0) {
-				DRM_ERROR(
-				"Could not share ion buffer. Error = %d\n",
-					args->ion_fd);
-				ret = -EINVAL;
-			}
-		} else {
-			DRM_ERROR("GEM object has no ion memory allocated.\n");
-			ret = -EINVAL;
-		}
-	}
-
-	drm_gem_object_unreference(obj);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret;
-}
-
-int
 kgsl_gem_setmemtype_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
@@ -782,9 +685,13 @@
 
 	if (ret) {
 		DRM_ERROR("Unable to allocate object memory\n");
+	} else if (!priv->mmap_offset) {
+		ret = kgsl_gem_create_mmap_offset(obj);
+		if (ret)
+			DRM_ERROR("Unable to create a mmap offset\n");
 	}
 
-	args->offset = 0;
+	args->offset = priv->mmap_offset;
 
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
@@ -796,7 +703,33 @@
 kgsl_gem_mmap_ioctl(struct drm_device *dev, void *data,
 			struct drm_file *file_priv)
 {
-	/* Ion is used for mmap at this time */
+	struct drm_kgsl_gem_mmap *args = data;
+	struct drm_gem_object *obj;
+	unsigned long addr;
+
+	obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+	if (obj == NULL) {
+		DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+		return -EBADF;
+	}
+
+	down_write(&current->mm->mmap_sem);
+
+	addr = do_mmap(obj->filp, 0, args->size,
+		       PROT_READ | PROT_WRITE, MAP_SHARED,
+		       args->offset);
+
+	up_write(&current->mm->mmap_sem);
+
+	mutex_lock(&dev->struct_mutex);
+	drm_gem_object_unreference(obj);
+	mutex_unlock(&dev->struct_mutex);
+
+	if (IS_ERR((void *) addr))
+		return addr;
+
+	args->hostptr = (uint32_t) addr;
 	return 0;
 }
 
@@ -829,6 +762,18 @@
 		return ret;
 	}
 
+	if (priv->mmap_offset == 0) {
+		ret = kgsl_gem_create_mmap_offset(obj);
+		if (ret) {
+			drm_gem_object_unreference(obj);
+			mutex_unlock(&dev->struct_mutex);
+			return ret;
+		}
+	}
+
+	args->offset = priv->mmap_offset;
+	args->phys = priv->memdesc.physaddr;
+
 	drm_gem_object_unreference(obj);
 	mutex_unlock(&dev->struct_mutex);
 
@@ -1012,6 +957,122 @@
 	}
 }
 
+static struct vm_operations_struct kgsl_gem_kmem_vm_ops = {
+	.fault = kgsl_gem_kmem_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+static struct vm_operations_struct kgsl_gem_phys_vm_ops = {
+	.fault = kgsl_gem_phys_fault,
+	.open = drm_gem_vm_open,
+	.close = drm_gem_vm_close,
+};
+
+/* This is a clone of the standard drm_gem_mmap function modified to allow
+   us to properly map KMEM regions as well as the PMEM regions */
+
+int msm_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+	struct drm_file *priv = filp->private_data;
+	struct drm_device *dev = priv->minor->dev;
+	struct drm_gem_mm *mm = dev->mm_private;
+	struct drm_local_map *map = NULL;
+	struct drm_gem_object *obj;
+	struct drm_hash_item *hash;
+	struct drm_kgsl_gem_object *gpriv;
+	int ret = 0;
+
+	mutex_lock(&dev->struct_mutex);
+
+	if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+		mutex_unlock(&dev->struct_mutex);
+		return drm_mmap(filp, vma);
+	}
+
+	map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+	if (!map ||
+	    ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+		ret =  -EPERM;
+		goto out_unlock;
+	}
+
+	/* Check for valid size. */
+	if (map->size < vma->vm_end - vma->vm_start) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	obj = map->handle;
+
+	gpriv = obj->driver_private;
+
+	/* VM_PFNMAP is only for memory that doesn't use struct page
+	 * in other words, not "normal" memory.  If you try to use it
+	 * with "normal" memory then the mappings don't get flushed. */
+
+	if (TYPE_IS_MEM(gpriv->type)) {
+		vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+		vma->vm_ops = &kgsl_gem_kmem_vm_ops;
+	} else {
+		vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP |
+			VM_DONTEXPAND;
+		vma->vm_ops = &kgsl_gem_phys_vm_ops;
+	}
+
+	vma->vm_private_data = map->handle;
+
+
+	/* Take care of requested caching policy */
+	if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM ||
+	    gpriv->type & DRM_KGSL_GEM_CACHE_MASK) {
+		if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACKWA)
+			vma->vm_page_prot =
+			pgprot_writebackwacache(vma->vm_page_prot);
+		else if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACK)
+				vma->vm_page_prot =
+				pgprot_writebackcache(vma->vm_page_prot);
+		else if (gpriv->type & DRM_KGSL_GEM_CACHE_WTHROUGH)
+				vma->vm_page_prot =
+				pgprot_writethroughcache(vma->vm_page_prot);
+		else
+			vma->vm_page_prot =
+			pgprot_writecombine(vma->vm_page_prot);
+	} else {
+		if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM_NOCACHE)
+			vma->vm_page_prot =
+			pgprot_noncached(vma->vm_page_prot);
+		else
+			/* default pmem is WC */
+			vma->vm_page_prot =
+			pgprot_writecombine(vma->vm_page_prot);
+	}
+
+	/* flush out existing KMEM cached mappings if new ones are
+	 * of uncached type */
+	if (IS_MEM_UNCACHED(gpriv->type))
+		kgsl_cache_range_op(&gpriv->memdesc,
+				    KGSL_CACHE_OP_FLUSH);
+
+	/* Add the other memory types here */
+
+	/* Take a ref for this mapping of the object, so that the fault
+	 * handler can dereference the mmap offset's pointer to the object.
+	 * This reference is cleaned up by the corresponding vm_close
+	 * (which should happen whether the vma was created by this call, or
+	 * by a vm_open due to mremap or partial unmap or whatever).
+	 */
+	drm_gem_object_reference(obj);
+
+	vma->vm_file = filp;	/* Needed for drm_vm_open() */
+	drm_vm_open_locked(vma);
+
+out_unlock:
+	mutex_unlock(&dev->struct_mutex);
+
+	return ret;
+}
+
 void
 cleanup_fence(struct drm_kgsl_gem_object_fence *fence, int check_waiting)
 {
@@ -1373,9 +1434,6 @@
 	DRM_IOCTL_DEF_DRV(KGSL_GEM_ALLOC, kgsl_gem_alloc_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(KGSL_GEM_MMAP, kgsl_gem_mmap_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(KGSL_GEM_GET_BUFINFO, kgsl_gem_get_bufinfo_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(KGSL_GEM_GET_ION_FD, kgsl_gem_get_ion_fd_ioctl, 0),
-	DRM_IOCTL_DEF_DRV(KGSL_GEM_CREATE_FROM_ION,
-		kgsl_gem_create_from_ion_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_BUFCOUNT,
 		      kgsl_gem_set_bufcount_ioctl, 0),
 	DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_ACTIVE, kgsl_gem_set_active_ioctl, 0),
@@ -1389,22 +1447,28 @@
 		      DRM_MASTER),
 };
 
-static const struct file_operations kgsl_drm_driver_fops = {
-	.owner = THIS_MODULE,
-	.open = drm_open,
-	.release = drm_release,
-	.unlocked_ioctl = drm_ioctl,
-	.mmap = drm_gem_mmap,
-	.poll = drm_poll,
-	.fasync = drm_fasync,
-};
-
 static struct drm_driver driver = {
 	.driver_features = DRIVER_GEM,
+	.load = kgsl_drm_load,
+	.unload = kgsl_drm_unload,
+	.preclose = kgsl_drm_preclose,
+	.suspend = kgsl_drm_suspend,
+	.resume = kgsl_drm_resume,
+	.reclaim_buffers = drm_core_reclaim_buffers,
 	.gem_init_object = kgsl_gem_init_object,
 	.gem_free_object = kgsl_gem_free_object,
 	.ioctls = kgsl_drm_ioctls,
-	.fops = &kgsl_drm_driver_fops,
+
+	.fops = {
+		 .owner = THIS_MODULE,
+		 .open = drm_open,
+		 .release = drm_release,
+		 .unlocked_ioctl = drm_ioctl,
+		 .mmap = msm_drm_gem_mmap,
+		 .poll = drm_poll,
+		 .fasync = drm_fasync,
+		 },
+
 	.name = DRIVER_NAME,
 	.desc = DRIVER_DESC,
 	.date = DRIVER_DATE,
@@ -1433,24 +1497,11 @@
 		gem_buf_fence[i].fence_id = ENTRY_EMPTY;
 	}
 
-	/* Create ION Client */
-	kgsl_drm_ion_client = msm_ion_client_create(
-			0xffffffff, "kgsl_drm");
-	if (!kgsl_drm_ion_client) {
-		DRM_ERROR("Unable to create ION client\n");
-		return -ENOMEM;
-	}
-
 	return drm_platform_init(&driver, dev);
 }
 
 void kgsl_drm_exit(void)
 {
 	kgsl_drm_inited = DRM_KGSL_NOT_INITED;
-
-	if (kgsl_drm_ion_client)
-		ion_client_destroy(kgsl_drm_ion_client);
-	kgsl_drm_ion_client = NULL;
-
 	drm_platform_exit(&driver, driver.kdriver.platform_device);
 }
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
index 5cc0dff..8f28505 100644
--- a/drivers/gpu/msm/kgsl_gpummu.c
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -19,11 +19,9 @@
 
 #include "kgsl.h"
 #include "kgsl_mmu.h"
-#include "kgsl_gpummu.h"
 #include "kgsl_device.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_trace.h"
-#include "adreno.h"
 
 #define KGSL_PAGETABLE_SIZE \
 	ALIGN(KGSL_PAGETABLE_ENTRIES(CONFIG_MSM_KGSL_PAGE_TABLE_SIZE) * \
@@ -163,7 +161,7 @@
 }
 
 static void *
-_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, phys_addr_t *physaddr)
+_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr)
 {
 	struct kgsl_ptpool_chunk *chunk;
 
@@ -229,7 +227,7 @@
  */
 
 static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool,
-				phys_addr_t *physaddr)
+				unsigned int *physaddr)
 {
 	void *addr = NULL;
 	int ret;
@@ -365,9 +363,10 @@
 	return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
 }
 
-void kgsl_gpummu_destroy_pagetable(struct kgsl_pagetable *pt)
+void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt)
 {
-	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
+	struct kgsl_gpummu_pt *gpummu_pt = (struct kgsl_gpummu_pt *)
+						mmu_specific_pt;
 	kgsl_ptpool_free((struct kgsl_ptpool *)kgsl_driver.ptpool,
 				gpummu_pt->base.hostptr);
 
@@ -404,22 +403,11 @@
 {
 	unsigned int reg;
 	unsigned int ptbase;
-	struct kgsl_device *device;
-	struct adreno_device *adreno_dev;
-	unsigned int no_page_fault_log = 0;
 
-	device = mmu->device;
-	adreno_dev = ADRENO_DEVICE(device);
+	kgsl_regread(mmu->device, MH_MMU_PAGE_FAULT, &reg);
+	kgsl_regread(mmu->device, MH_MMU_PT_BASE, &ptbase);
 
-	kgsl_regread(device, MH_MMU_PAGE_FAULT, &reg);
-	kgsl_regread(device, MH_MMU_PT_BASE, &ptbase);
-
-
-	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
-		no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, reg);
-
-	if (!no_page_fault_log)
-		KGSL_MEM_CRIT(mmu->device,
+	KGSL_MEM_CRIT(mmu->device,
 			"mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n",
 			reg & ~(PAGE_SIZE - 1),
 			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
@@ -528,11 +516,6 @@
 	 */
 	int status = 0;
 
-	mmu->pt_base = KGSL_PAGETABLE_BASE;
-	mmu->pt_size = CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
-	mmu->pt_per_process = KGSL_MMU_USE_PER_PROCESS_PT;
-	mmu->use_cpu_map = false;
-
 	/* sub-client MMU lookups require address translation */
 	if ((mmu->config & ~0x1) > 0) {
 		/*make sure virtual address range is a multiple of 64Kb */
@@ -589,7 +572,7 @@
 
 	if (mmu->defaultpagetable == NULL)
 		mmu->defaultpagetable =
-			kgsl_mmu_getpagetable(mmu, KGSL_MMU_GLOBAL_PT);
+			kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
 
 	/* Return error if the default pagetable doesn't exist */
 	if (mmu->defaultpagetable == NULL)
@@ -609,14 +592,14 @@
 }
 
 static int
-kgsl_gpummu_unmap(struct kgsl_pagetable *pt,
+kgsl_gpummu_unmap(void *mmu_specific_pt,
 		struct kgsl_memdesc *memdesc,
 		unsigned int *tlb_flags)
 {
 	unsigned int numpages;
 	unsigned int pte, ptefirst, ptelast, superpte;
 	unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
-	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
+	struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
 
 	/* All GPU addresses as assigned are page aligned, but some
 	   functions purturb the gpuaddr with an offset, so apply the
@@ -658,13 +641,13 @@
 GSL_TLBFLUSH_FILTER_ISDIRTY((_p) / GSL_PT_SUPER_PTE))
 
 static int
-kgsl_gpummu_map(struct kgsl_pagetable *pt,
+kgsl_gpummu_map(void *mmu_specific_pt,
 		struct kgsl_memdesc *memdesc,
 		unsigned int protflags,
 		unsigned int *tlb_flags)
 {
 	unsigned int pte;
-	struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
+	struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
 	struct scatterlist *s;
 	int flushtlb = 0;
 	int i;
diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h
index 1753aff..99e7d5f 100644
--- a/drivers/gpu/msm/kgsl_gpummu.h
+++ b/drivers/gpu/msm/kgsl_gpummu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -57,7 +57,7 @@
 	int dynamic;
 
 	void *data;
-	phys_addr_t phys;
+	unsigned int phys;
 
 	unsigned long *bitmap;
 	struct list_head list;
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 739fcff..d285a5a 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -11,7 +11,6 @@
  *
  */
 #include <linux/types.h>
-#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/spinlock.h>
 #include <linux/genalloc.h>
@@ -21,7 +20,6 @@
 #include <mach/socinfo.h>
 #include <mach/msm_iomap.h>
 #include <mach/board.h>
-#include <mach/iommu_domains.h>
 #include <stddef.h>
 
 #include "kgsl.h"
@@ -33,33 +31,24 @@
 #include "adreno.h"
 #include "kgsl_trace.h"
 #include "z180.h"
-#include "kgsl_cffdump.h"
 
 
-static struct kgsl_iommu_register_list kgsl_iommuv0_reg[KGSL_IOMMU_REG_MAX] = {
+static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
 	{ 0, 0, 0 },				/* GLOBAL_BASE */
 	{ 0x10, 0x0003FFFF, 14 },		/* TTBR0 */
 	{ 0x14, 0x0003FFFF, 14 },		/* TTBR1 */
 	{ 0x20, 0, 0 },				/* FSR */
 	{ 0x800, 0, 0 },			/* TLBIALL */
 	{ 0x820, 0, 0 },			/* RESUME */
-	{ 0x03C, 0, 0 },			/* TLBLKCR */
-	{ 0x818, 0, 0 },			/* V2PUR */
-	{ 0x2C, 0, 0 },                         /* FSYNR0 */
-	{ 0x2C, 0, 0 },                         /* FSYNR0 */
 };
 
-static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
+static struct kgsl_iommu_register_list kgsl_iommuv2_reg[KGSL_IOMMU_REG_MAX] = {
 	{ 0, 0, 0 },				/* GLOBAL_BASE */
 	{ 0x20, 0x00FFFFFF, 14 },		/* TTBR0 */
 	{ 0x28, 0x00FFFFFF, 14 },		/* TTBR1 */
 	{ 0x58, 0, 0 },				/* FSR */
 	{ 0x618, 0, 0 },			/* TLBIALL */
-	{ 0x008, 0, 0 },			/* RESUME */
-	{ 0, 0, 0 },				/* TLBLKCR */
-	{ 0, 0, 0 },				/* V2PUR */
-	{ 0x68, 0, 0 },				/* FSYNR0 */
-	{ 0x6C, 0, 0 }				/* FSYNR1 */
+	{ 0x008, 0, 0 }				/* RESUME */
 };
 
 struct remote_iommu_petersons_spinlock kgsl_iommu_sync_lock_vars;
@@ -111,172 +100,8 @@
 	return NULL;
 }
 
-/* These functions help find the nearest allocated memory entries on either side
- * of a faulting address. If we know the nearby allocations memory we can
- * get a better determination of what we think should have been located in the
- * faulting region
- */
-
-/*
- * A local structure to make it easy to store the interesting bits for the
- * memory entries on either side of the faulting address
- */
-
-struct _mem_entry {
-	unsigned int gpuaddr;
-	unsigned int size;
-	unsigned int flags;
-	unsigned int priv;
-	pid_t pid;
-};
-
-/*
- * Find the closest alloated memory block with an smaller GPU address then the
- * given address
- */
-
-static void _prev_entry(struct kgsl_process_private *priv,
-	unsigned int faultaddr, struct _mem_entry *ret)
-{
-	struct rb_node *node;
-	struct kgsl_mem_entry *entry;
-
-	for (node = rb_first(&priv->mem_rb); node; ) {
-		entry = rb_entry(node, struct kgsl_mem_entry, node);
-
-		if (entry->memdesc.gpuaddr > faultaddr)
-			break;
-
-		/*
-		 * If this is closer to the faulting address, then copy
-		 * the entry
-		 */
-
-		if (entry->memdesc.gpuaddr > ret->gpuaddr) {
-			ret->gpuaddr = entry->memdesc.gpuaddr;
-			ret->size = entry->memdesc.size;
-			ret->flags = entry->memdesc.flags;
-			ret->priv = entry->memdesc.priv;
-			ret->pid = priv->pid;
-		}
-
-		node = rb_next(&entry->node);
-	}
-}
-
-/*
- * Find the closest alloated memory block with a greater starting GPU address
- * then the given address
- */
-
-static void _next_entry(struct kgsl_process_private *priv,
-	unsigned int faultaddr, struct _mem_entry *ret)
-{
-	struct rb_node *node;
-	struct kgsl_mem_entry *entry;
-
-	for (node = rb_last(&priv->mem_rb); node; ) {
-		entry = rb_entry(node, struct kgsl_mem_entry, node);
-
-		if (entry->memdesc.gpuaddr < faultaddr)
-			break;
-
-		/*
-		 * If this is closer to the faulting address, then copy
-		 * the entry
-		 */
-
-		if (entry->memdesc.gpuaddr < ret->gpuaddr) {
-			ret->gpuaddr = entry->memdesc.gpuaddr;
-			ret->size = entry->memdesc.size;
-			ret->flags = entry->memdesc.flags;
-			ret->priv = entry->memdesc.priv;
-			ret->pid = priv->pid;
-		}
-
-		node = rb_prev(&entry->node);
-	}
-}
-
-static void _find_mem_entries(struct kgsl_mmu *mmu, unsigned int faultaddr,
-	unsigned int ptbase, struct _mem_entry *preventry,
-	struct _mem_entry *nextentry)
-{
-	struct kgsl_process_private *private;
-	int id = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
-
-	memset(preventry, 0, sizeof(*preventry));
-	memset(nextentry, 0, sizeof(*nextentry));
-
-	/* Set the maximum possible size as an initial value */
-	nextentry->gpuaddr = 0xFFFFFFFF;
-
-	mutex_lock(&kgsl_driver.process_mutex);
-
-	list_for_each_entry(private, &kgsl_driver.process_list, list) {
-
-		if (private->pagetable->name != id)
-			continue;
-
-		spin_lock(&private->mem_lock);
-		_prev_entry(private, faultaddr, preventry);
-		_next_entry(private, faultaddr, nextentry);
-		spin_unlock(&private->mem_lock);
-	}
-
-	mutex_unlock(&kgsl_driver.process_mutex);
-}
-
-static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry)
-{
-	char name[32];
-	memset(name, 0, sizeof(name));
-
-	kgsl_get_memory_usage(name, sizeof(name) - 1, entry->flags);
-
-	KGSL_LOG_DUMP(device,
-		"[%8.8X - %8.8X] %s (pid = %d) (%s)\n",
-		entry->gpuaddr,
-		entry->gpuaddr + entry->size,
-		entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "",
-		entry->pid, name);
-}
-
-static void _check_if_freed(struct kgsl_iommu_device *iommu_dev,
-	unsigned long addr, unsigned int pid)
-{
-	void *base = kgsl_driver.memfree_hist.base_hist_rb;
-	struct kgsl_memfree_hist_elem *wptr;
-	struct kgsl_memfree_hist_elem *p;
-
-	mutex_lock(&kgsl_driver.memfree_hist_mutex);
-	wptr = kgsl_driver.memfree_hist.wptr;
-	p = wptr;
-	for (;;) {
-		if (p->size && p->pid == pid)
-			if (addr >= p->gpuaddr &&
-				addr < (p->gpuaddr + p->size)) {
-
-				KGSL_LOG_DUMP(iommu_dev->kgsldev,
-					"---- premature free ----\n");
-				KGSL_LOG_DUMP(iommu_dev->kgsldev,
-					"[%8.8X-%8.8X] was already freed by pid %d\n",
-					p->gpuaddr,
-					p->gpuaddr + p->size,
-					p->pid);
-			}
-		p++;
-		if ((void *)p >= base + kgsl_driver.memfree_hist.size)
-			p = (struct kgsl_memfree_hist_elem *) base;
-
-		if (p == kgsl_driver.memfree_hist.wptr)
-			break;
-	}
-	mutex_unlock(&kgsl_driver.memfree_hist_mutex);
-}
-
 static int kgsl_iommu_fault_handler(struct iommu_domain *domain,
-	struct device *dev, unsigned long addr, int flags, void *token)
+	struct device *dev, unsigned long addr, int flags,void *token)
 {
 	int ret = 0;
 	struct kgsl_mmu *mmu;
@@ -284,10 +109,6 @@
 	struct kgsl_iommu_unit *iommu_unit;
 	struct kgsl_iommu_device *iommu_dev;
 	unsigned int ptbase, fsr;
-	unsigned int pid;
-	struct _mem_entry prev, next;
-	unsigned int fsynr0, fsynr1;
-	int write;
 	struct kgsl_device *device;
 	struct adreno_device *adreno_dev;
 	unsigned int no_page_fault_log = 0;
@@ -314,25 +135,6 @@
 
 	fsr = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
 		iommu_dev->ctx_id, FSR);
-	fsynr0 = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
-		iommu_dev->ctx_id, FSYNR0);
-	fsynr1 = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
-		iommu_dev->ctx_id, FSYNR1);
-
-	if (msm_soc_version_supports_iommu_v0())
-		write = ((fsynr1 & (KGSL_IOMMU_FSYNR1_AWRITE_MASK <<
-			KGSL_IOMMU_FSYNR1_AWRITE_SHIFT)) ? 1 : 0);
-	else
-		write = ((fsynr0 & (KGSL_IOMMU_V1_FSYNR0_WNR_MASK <<
-			KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT)) ? 1 : 0);
-
-	pid = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
-	KGSL_MEM_CRIT(iommu_dev->kgsldev,
-		"GPU PAGE FAULT: addr = %lX pid = %d\n", addr, pid);
-	KGSL_MEM_CRIT(iommu_dev->kgsldev,
-		"context = %d FSR = %X FSYNR0 = %X FSYNR1 = %X(%s fault)\n",
-		iommu_dev->ctx_id, fsr, fsynr0, fsynr1,
-		write ? "write" : "read");
 
 	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
 		no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr);
@@ -343,25 +145,6 @@
 			addr, kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase));
 		KGSL_MEM_CRIT(iommu_dev->kgsldev, "context = %d FSR = %X\n",
 			iommu_dev->ctx_id, fsr);
-
-		_check_if_freed(iommu_dev, addr, pid);
-
-		KGSL_LOG_DUMP(iommu_dev->kgsldev, "---- nearby memory ----\n");
-
-		_find_mem_entries(mmu, addr, ptbase, &prev, &next);
-
-		if (prev.gpuaddr)
-			_print_entry(iommu_dev->kgsldev, &prev);
-		else
-			KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n");
-
-		KGSL_LOG_DUMP(iommu_dev->kgsldev, " <- fault @ %8.8lX\n", addr);
-
-		if (next.gpuaddr != 0xFFFFFFFF)
-			_print_entry(iommu_dev->kgsldev, &next);
-		else
-			KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n");
-
 	}
 
 	mmu->fault = 1;
@@ -377,15 +160,14 @@
 		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp));
 
 	/*
-	 * Store pagefault's timestamp in adreno context,
-	 * this information will be used in GFT
+	 * Store pagefault's timestamp and ib1 addr in context,
+	 * this information is used in GFT
 	 */
 	curr_context->pagefault = 1;
 	curr_context->pagefault_ts = curr_global_ts;
 
 	trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
-			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
-			write ? "write" : "read");
+			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase), 0);
 
 	/*
 	 * We do not want the h/w to resume fetching data from an iommu unit
@@ -605,9 +387,9 @@
  *
  * Return - void
  */
-static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pt)
+static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt)
 {
-	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
 	if (iommu_pt->domain)
 		iommu_domain_free(iommu_pt->domain);
 	kfree(iommu_pt);
@@ -622,39 +404,28 @@
  */
 void *kgsl_iommu_create_pagetable(void)
 {
-	int domain_num;
 	struct kgsl_iommu_pt *iommu_pt;
 
-	struct msm_iova_partition kgsl_partition = {
-		.start = 0,
-		.size = 0xFFFFFFFF,
-	};
-	struct msm_iova_layout kgsl_layout = {
-		.partitions = &kgsl_partition,
-		.npartitions = 1,
-		.client_name = "kgsl",
-		.domain_flags = 0,
-	};
-
 	iommu_pt = kzalloc(sizeof(struct kgsl_iommu_pt), GFP_KERNEL);
 	if (!iommu_pt) {
 		KGSL_CORE_ERR("kzalloc(%d) failed\n",
 				sizeof(struct kgsl_iommu_pt));
 		return NULL;
 	}
-	/* L2 redirect is not stable on IOMMU v1 */
-	if (msm_soc_version_supports_iommu_v0())
-		kgsl_layout.domain_flags = MSM_IOMMU_DOMAIN_PT_CACHEABLE;
-
-	domain_num = msm_register_domain(&kgsl_layout);
-	if (domain_num >= 0) {
-		iommu_pt->domain = msm_get_iommu_domain(domain_num);
-		iommu_set_fault_handler(iommu_pt->domain,
-			kgsl_iommu_fault_handler, NULL);
-	} else {
+	/* L2 redirect is not stable on IOMMU v2 */
+	if (msm_soc_version_supports_iommu_v1())
+		iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
+					MSM_IOMMU_DOMAIN_PT_CACHEABLE);
+	else
+		iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
+					0);
+	if (!iommu_pt->domain) {
 		KGSL_CORE_ERR("Failed to create iommu domain\n");
 		kfree(iommu_pt);
 		return NULL;
+	} else {
+		iommu_set_fault_handler(iommu_pt->domain,
+			kgsl_iommu_fault_handler, NULL);
 	}
 
 	return iommu_pt;
@@ -772,23 +543,17 @@
 {
 	struct kgsl_iommu *iommu = mmu->priv;
 	struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[unit_id];
-	int i, j;
-	int found_ctx;
+	int i;
 
-	for (j = 0; j < KGSL_IOMMU_MAX_DEVS_PER_UNIT; j++) {
-		found_ctx = 0;
-		for (i = 0; i < data->iommu_ctx_count; i++) {
-			if (j == data->iommu_ctxs[i].ctx_id) {
-				found_ctx = 1;
-				break;
-			}
-		}
-		if (!found_ctx)
-			break;
-		if (!data->iommu_ctxs[i].iommu_ctx_name) {
-			KGSL_CORE_ERR("Context name invalid\n");
-			return -EINVAL;
-		}
+	if (data->iommu_ctx_count > KGSL_IOMMU_MAX_DEVS_PER_UNIT) {
+		KGSL_CORE_ERR("Too many iommu devices defined for an "
+				"IOMMU unit\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < data->iommu_ctx_count; i++) {
+		if (!data->iommu_ctxs[i].iommu_ctx_name)
+			continue;
 
 		iommu_unit->dev[iommu_unit->dev_count].dev =
 			msm_iommu_get_ctx(data->iommu_ctxs[i].iommu_ctx_name);
@@ -797,6 +562,12 @@
 			"device %s\n", data->iommu_ctxs[i].iommu_ctx_name);
 			return -EINVAL;
 		}
+		if (KGSL_IOMMU_CONTEXT_USER != data->iommu_ctxs[i].ctx_id &&
+			KGSL_IOMMU_CONTEXT_PRIV != data->iommu_ctxs[i].ctx_id) {
+			KGSL_CORE_ERR("Invalid context ID defined: %d\n",
+					data->iommu_ctxs[i].ctx_id);
+			return -EINVAL;
+		}
 		iommu_unit->dev[iommu_unit->dev_count].ctx_id =
 						data->iommu_ctxs[i].ctx_id;
 		iommu_unit->dev[iommu_unit->dev_count].kgsldev = mmu->device;
@@ -808,51 +579,6 @@
 
 		iommu_unit->dev_count++;
 	}
-	if (!j) {
-		KGSL_CORE_ERR("No ctxts initialized, user ctxt absent\n ");
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-/*
- * kgsl_iommu_start_sync_lock - Initialize some variables during MMU start up
- * for GPU CPU synchronization
- * @mmu - Pointer to mmu device
- *
- * Return - 0 on success else error code
- */
-static int kgsl_iommu_start_sync_lock(struct kgsl_mmu *mmu)
-{
-	struct kgsl_iommu *iommu = mmu->priv;
-	uint32_t lock_gpu_addr = 0;
-
-	if (KGSL_DEVICE_3D0 != mmu->device->id ||
-		!msm_soc_version_supports_iommu_v0() ||
-		!kgsl_mmu_is_perprocess(mmu) ||
-		iommu->sync_lock_vars)
-		return 0;
-
-	if (!(mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC)) {
-		KGSL_DRV_ERR(mmu->device,
-		"The GPU microcode does not support IOMMUv1 sync opcodes\n");
-		return -ENXIO;
-	}
-	/* Store Lock variables GPU address  */
-	lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr +
-			iommu->sync_lock_offset);
-
-	kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
-		(offsetof(struct remote_iommu_petersons_spinlock,
-			flag[PROC_APPS])));
-	kgsl_iommu_sync_lock_vars.flag[PROC_GPU] = (lock_gpu_addr +
-		(offsetof(struct remote_iommu_petersons_spinlock,
-			flag[PROC_GPU])));
-	kgsl_iommu_sync_lock_vars.turn = (lock_gpu_addr +
-		(offsetof(struct remote_iommu_petersons_spinlock, turn)));
-
-	iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
 
 	return 0;
 }
@@ -865,30 +591,21 @@
  */
 static int kgsl_iommu_init_sync_lock(struct kgsl_mmu *mmu)
 {
-	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_iommu *iommu = mmu->device->mmu.priv;
 	int status = 0;
+	struct kgsl_pagetable *pagetable = NULL;
+	uint32_t lock_gpu_addr = 0;
 	uint32_t lock_phy_addr = 0;
 	uint32_t page_offset = 0;
 
-	if (!msm_soc_version_supports_iommu_v0() ||
-		!kgsl_mmu_is_perprocess(mmu))
-		return status;
+	iommu->sync_lock_initialized = 0;
 
-	/*
-	 * For 2D devices cpu side sync lock is required. For 3D device,
-	 * since we only have a single 3D core and we always ensure that
-	 * 3D core is idle while writing to IOMMU register using CPU this
-	 * lock is not required
-	 */
-	if (KGSL_DEVICE_2D0 == mmu->device->id ||
-		KGSL_DEVICE_2D1 == mmu->device->id) {
-		return status;
+	if (!(mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC)) {
+		KGSL_DRV_ERR(mmu->device,
+		"The GPU microcode does not support IOMMUv1 sync opcodes\n");
+		return -ENXIO;
 	}
 
-	/* Return if already initialized */
-	if (iommu->sync_lock_initialized)
-		return status;
-
 	/* Get the physical address of the Lock variables */
 	lock_phy_addr = (msm_iommu_lock_initialize()
 			- MSM_SHARED_RAM_BASE + msm_shared_ram_phys);
@@ -903,7 +620,6 @@
 	page_offset = (lock_phy_addr & (PAGE_SIZE - 1));
 	lock_phy_addr = (lock_phy_addr & ~(PAGE_SIZE - 1));
 	iommu->sync_lock_desc.physaddr = (unsigned int)lock_phy_addr;
-	iommu->sync_lock_offset = page_offset;
 
 	iommu->sync_lock_desc.size =
 				PAGE_ALIGN(sizeof(kgsl_iommu_sync_lock_vars));
@@ -914,6 +630,35 @@
 	if (status)
 		return status;
 
+	/* Map Lock variables to GPU pagetable */
+	iommu->sync_lock_desc.priv |= KGSL_MEMDESC_GLOBAL;
+
+	pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
+				mmu->defaultpagetable;
+
+	status = kgsl_mmu_map(pagetable, &iommu->sync_lock_desc,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+
+	if (status) {
+		kgsl_mmu_unmap(pagetable, &iommu->sync_lock_desc);
+		iommu->sync_lock_desc.priv &= ~KGSL_MEMDESC_GLOBAL;
+		return status;
+	}
+
+	/* Store Lock variables GPU address  */
+	lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr + page_offset);
+
+	kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
+		(offsetof(struct remote_iommu_petersons_spinlock,
+			flag[PROC_APPS])));
+	kgsl_iommu_sync_lock_vars.flag[PROC_GPU] = (lock_gpu_addr +
+		(offsetof(struct remote_iommu_petersons_spinlock,
+			flag[PROC_GPU])));
+	kgsl_iommu_sync_lock_vars.turn = (lock_gpu_addr +
+		(offsetof(struct remote_iommu_petersons_spinlock, turn)));
+
+	iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
+
 	/* Flag Sync Lock is Initialized  */
 	iommu->sync_lock_initialized = 1;
 
@@ -1172,75 +917,6 @@
 	}
 }
 
-/*
- * kgsl_iommu_setup_regs - map iommu registers into a pagetable
- * @mmu: Pointer to mmu structure
- * @pt: the pagetable
- *
- * To do pagetable switches from the GPU command stream, the IOMMU
- * registers need to be mapped into the GPU's pagetable. This function
- * is used differently on different targets. On 8960, the registers
- * are mapped into every pagetable during kgsl_setup_pt(). On
- * all other targets, the registers are mapped only into the second
- * context bank.
- *
- * Return - 0 on success else error code
- */
-static int kgsl_iommu_setup_regs(struct kgsl_mmu *mmu,
-				    struct kgsl_pagetable *pt)
-{
-	int status;
-	int i = 0;
-	struct kgsl_iommu *iommu = mmu->priv;
-
-	if (!msm_soc_version_supports_iommu_v0())
-		return 0;
-
-	for (i = 0; i < iommu->unit_count; i++) {
-		status = kgsl_mmu_map_global(pt,
-				&(iommu->iommu_units[i].reg_map));
-		if (status)
-			goto err;
-	}
-
-	/* Map Lock variables to GPU pagetable */
-	if (iommu->sync_lock_initialized) {
-		status = kgsl_mmu_map_global(pt, &iommu->sync_lock_desc);
-		if (status)
-			goto err;
-	}
-
-	return 0;
-err:
-	for (i--; i >= 0; i--)
-		kgsl_mmu_unmap(pt,
-				&(iommu->iommu_units[i].reg_map));
-
-	return status;
-}
-
-/*
- * kgsl_iommu_cleanup_regs - unmap iommu registers from a pagetable
- * @mmu: Pointer to mmu structure
- * @pt: the pagetable
- *
- * Removes mappings created by kgsl_iommu_setup_regs().
- *
- * Return - 0 on success else error code
- */
-static void kgsl_iommu_cleanup_regs(struct kgsl_mmu *mmu,
-					struct kgsl_pagetable *pt)
-{
-	struct kgsl_iommu *iommu = mmu->priv;
-	int i;
-	for (i = 0; i < iommu->unit_count; i++)
-		kgsl_mmu_unmap(pt, &(iommu->iommu_units[i].reg_map));
-
-	if (iommu->sync_lock_desc.gpuaddr)
-		kgsl_mmu_unmap(pt, &iommu->sync_lock_desc);
-}
-
-
 static int kgsl_iommu_init(struct kgsl_mmu *mmu)
 {
 	/*
@@ -1265,45 +941,16 @@
 	status = kgsl_set_register_map(mmu);
 	if (status)
 		goto done;
-	status = kgsl_iommu_init_sync_lock(mmu);
-	if (status)
-		goto done;
 
-	/* We presently do not support per-process for IOMMU-v1 */
-	mmu->pt_per_process = KGSL_MMU_USE_PER_PROCESS_PT &&
-				msm_soc_version_supports_iommu_v0();
+	iommu->iommu_reg_list = kgsl_iommuv1_reg;
+	iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
 
-	/*
-	 * For IOMMU per-process pagetables, the allocatable range
-	 * and the kernel global range must both be outside
-	 * the userspace address range. There is a 1Mb gap
-	 * between these address ranges to make overrun
-	 * detection easier.
-	 * For the shared pagetable case use 2GB and because
-	 * mirroring the CPU address space is not possible and
-	 * we're better off with extra room.
-	 */
-	if (mmu->pt_per_process) {
-		mmu->pt_base = PAGE_OFFSET;
-		mmu->pt_size = KGSL_IOMMU_GLOBAL_MEM_BASE
-				- kgsl_mmu_get_base_addr(mmu) - SZ_1M;
-		mmu->use_cpu_map = true;
-	} else {
-		mmu->pt_base = KGSL_PAGETABLE_BASE;
-		mmu->pt_size = SZ_2G;
-		mmu->use_cpu_map = false;
-	}
-
-
-	iommu->iommu_reg_list = kgsl_iommuv0_reg;
-	iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V0;
-
-	if (msm_soc_version_supports_iommu_v0()) {
-		iommu->iommu_reg_list = kgsl_iommuv0_reg;
-		iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V0;
-	} else {
+	if (msm_soc_version_supports_iommu_v1()) {
 		iommu->iommu_reg_list = kgsl_iommuv1_reg;
 		iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
+	} else {
+		iommu->iommu_reg_list = kgsl_iommuv2_reg;
+		iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V2;
 	}
 
 	/* A nop is required in an indirect buffer when switching
@@ -1312,15 +959,6 @@
 				KGSL_IOMMU_SETSTATE_NOP_OFFSET,
 				cp_nop_packet(1));
 
-	if (cpu_is_msm8960()) {
-		/*
-		 * 8960 doesn't have a second context bank, so the IOMMU
-		 * registers must be mapped into every pagetable.
-		 */
-		iommu_ops.mmu_setup_pt = kgsl_iommu_setup_regs;
-		iommu_ops.mmu_cleanup_pt = kgsl_iommu_cleanup_regs;
-	}
-
 	dev_info(mmu->device->dev, "|%s| MMU type set for device is IOMMU\n",
 			__func__);
 done:
@@ -1343,31 +981,51 @@
 static int kgsl_iommu_setup_defaultpagetable(struct kgsl_mmu *mmu)
 {
 	int status = 0;
+	int i = 0;
+	struct kgsl_iommu *iommu = mmu->priv;
+	struct kgsl_pagetable *pagetable = NULL;
 
 	/* If chip is not 8960 then we use the 2nd context bank for pagetable
 	 * switching on the 3D side for which a separate table is allocated */
-	if (!cpu_is_msm8960() && msm_soc_version_supports_iommu_v0()) {
+	if (!cpu_is_msm8960() && msm_soc_version_supports_iommu_v1()) {
 		mmu->priv_bank_table =
-			kgsl_mmu_getpagetable(mmu,
-					KGSL_MMU_PRIV_BANK_TABLE_NAME);
+			kgsl_mmu_getpagetable(KGSL_MMU_PRIV_BANK_TABLE_NAME);
 		if (mmu->priv_bank_table == NULL) {
 			status = -ENOMEM;
 			goto err;
 		}
-		status = kgsl_iommu_setup_regs(mmu, mmu->priv_bank_table);
-		if (status)
-			goto err;
 	}
-	mmu->defaultpagetable = kgsl_mmu_getpagetable(mmu, KGSL_MMU_GLOBAL_PT);
+	mmu->defaultpagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
 	/* Return error if the default pagetable doesn't exist */
 	if (mmu->defaultpagetable == NULL) {
 		status = -ENOMEM;
 		goto err;
 	}
+	pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
+				mmu->defaultpagetable;
+	/* Map the IOMMU regsiters to only defaultpagetable */
+	if (msm_soc_version_supports_iommu_v1()) {
+		for (i = 0; i < iommu->unit_count; i++) {
+			iommu->iommu_units[i].reg_map.priv |=
+						KGSL_MEMDESC_GLOBAL;
+			status = kgsl_mmu_map(pagetable,
+				&(iommu->iommu_units[i].reg_map),
+				GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+			if (status) {
+				iommu->iommu_units[i].reg_map.priv &=
+							~KGSL_MEMDESC_GLOBAL;
+				goto err;
+			}
+		}
+	}
 	return status;
 err:
+	for (i--; i >= 0; i--) {
+		kgsl_mmu_unmap(pagetable,
+				&(iommu->iommu_units[i].reg_map));
+		iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMDESC_GLOBAL;
+	}
 	if (mmu->priv_bank_table) {
-		kgsl_iommu_cleanup_regs(mmu, mmu->priv_bank_table);
 		kgsl_mmu_putpagetable(mmu->priv_bank_table);
 		mmu->priv_bank_table = NULL;
 	}
@@ -1378,113 +1036,9 @@
 	return status;
 }
 
-/*
- * kgsl_iommu_lock_rb_in_tlb - Allocates tlb entries and locks the
- * virtual to physical address translation of ringbuffer for 3D
- * device into tlb.
- * @mmu - Pointer to mmu structure
- *
- * Return - void
- */
-static void kgsl_iommu_lock_rb_in_tlb(struct kgsl_mmu *mmu)
-{
-	struct kgsl_device *device = mmu->device;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb;
-	struct kgsl_iommu *iommu = mmu->priv;
-	unsigned int num_tlb_entries;
-	unsigned int tlblkcr = 0;
-	unsigned int v2pxx = 0;
-	unsigned int vaddr = 0;
-	int i, j, k, l;
-
-	if (!iommu->sync_lock_initialized)
-		return;
-
-	rb = &adreno_dev->ringbuffer;
-	num_tlb_entries = rb->buffer_desc.size / PAGE_SIZE;
-
-	for (i = 0; i < iommu->unit_count; i++) {
-		struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
-		for (j = 0; j < iommu_unit->dev_count; j++) {
-			tlblkcr = 0;
-			if (cpu_is_msm8960())
-				tlblkcr |= ((num_tlb_entries &
-					KGSL_IOMMU_TLBLKCR_FLOOR_MASK) <<
-					KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT);
-			else
-				tlblkcr |= (((num_tlb_entries *
-					iommu_unit->dev_count) &
-					KGSL_IOMMU_TLBLKCR_FLOOR_MASK) <<
-					KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT);
-			/* Do not invalidate locked entries on tlbiall flush */
-			tlblkcr	|= ((1 & KGSL_IOMMU_TLBLKCR_TLBIALLCFG_MASK)
-				<< KGSL_IOMMU_TLBLKCR_TLBIALLCFG_SHIFT);
-			tlblkcr	|= ((1 & KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_MASK)
-				<< KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_SHIFT);
-			tlblkcr	|= ((1 & KGSL_IOMMU_TLBLKCR_TLBIVAACFG_MASK)
-				<< KGSL_IOMMU_TLBLKCR_TLBIVAACFG_SHIFT);
-			/* Enable tlb locking */
-			tlblkcr |= ((1 & KGSL_IOMMU_TLBLKCR_LKE_MASK)
-				<< KGSL_IOMMU_TLBLKCR_LKE_SHIFT);
-			KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
-					iommu_unit->dev[j].ctx_id,
-					TLBLKCR, tlblkcr);
-		}
-		for (j = 0; j < iommu_unit->dev_count; j++) {
-			/* skip locking entries for private bank on 8960 */
-			if (cpu_is_msm8960() &&  KGSL_IOMMU_CONTEXT_PRIV == j)
-				continue;
-			/* Lock the ringbuffer virtual address into tlb */
-			vaddr = rb->buffer_desc.gpuaddr;
-			for (k = 0; k < num_tlb_entries; k++) {
-				v2pxx = 0;
-				v2pxx |= (((k + j * num_tlb_entries) &
-					KGSL_IOMMU_V2PXX_INDEX_MASK)
-					<< KGSL_IOMMU_V2PXX_INDEX_SHIFT);
-				v2pxx |= vaddr & (KGSL_IOMMU_V2PXX_VA_MASK <<
-						KGSL_IOMMU_V2PXX_VA_SHIFT);
-
-				KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
-						iommu_unit->dev[j].ctx_id,
-						V2PUR, v2pxx);
-				vaddr += PAGE_SIZE;
-				for (l = 0; l < iommu_unit->dev_count; l++) {
-					tlblkcr = KGSL_IOMMU_GET_CTX_REG(iommu,
-						iommu_unit,
-						iommu_unit->dev[l].ctx_id,
-						TLBLKCR);
-					mb();
-					tlblkcr &=
-					~(KGSL_IOMMU_TLBLKCR_VICTIM_MASK
-					<< KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT);
-					tlblkcr |= (((k + 1 +
-					(j * num_tlb_entries)) &
-					KGSL_IOMMU_TLBLKCR_VICTIM_MASK) <<
-					KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT);
-					KGSL_IOMMU_SET_CTX_REG(iommu,
-						iommu_unit,
-						iommu_unit->dev[l].ctx_id,
-						TLBLKCR, tlblkcr);
-				}
-			}
-		}
-		for (j = 0; j < iommu_unit->dev_count; j++) {
-			tlblkcr = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
-						iommu_unit->dev[j].ctx_id,
-						TLBLKCR);
-			mb();
-			/* Disable tlb locking */
-			tlblkcr &= ~(KGSL_IOMMU_TLBLKCR_LKE_MASK
-				<< KGSL_IOMMU_TLBLKCR_LKE_SHIFT);
-			KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
-				iommu_unit->dev[j].ctx_id, TLBLKCR, tlblkcr);
-		}
-	}
-}
-
 static int kgsl_iommu_start(struct kgsl_mmu *mmu)
 {
+	struct kgsl_device *device = mmu->device;
 	int status;
 	struct kgsl_iommu *iommu = mmu->priv;
 	int i, j;
@@ -1496,21 +1050,23 @@
 		status = kgsl_iommu_setup_defaultpagetable(mmu);
 		if (status)
 			return -ENOMEM;
+
+		/* Initialize the sync lock between GPU and CPU */
+		if (msm_soc_version_supports_iommu_v1() &&
+			(device->id == KGSL_DEVICE_3D0))
+				kgsl_iommu_init_sync_lock(mmu);
 	}
-	status = kgsl_iommu_start_sync_lock(mmu);
-	if (status)
-		return status;
 
 	/* We use the GPU MMU to control access to IOMMU registers on 8960 with
 	 * a225, hence we still keep the MMU active on 8960 */
-	if (cpu_is_msm8960() && KGSL_DEVICE_3D0 == mmu->device->id) {
+	if (cpu_is_msm8960()) {
 		struct kgsl_mh *mh = &(mmu->device->mh);
-		BUG_ON(iommu->iommu_units[0].reg_map.gpuaddr != 0 &&
-			mh->mpu_base > iommu->iommu_units[0].reg_map.gpuaddr);
 		kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000001);
-
 		kgsl_regwrite(mmu->device, MH_MMU_MPU_END,
-			mh->mpu_base + mh->mpu_range);
+			mh->mpu_base +
+			iommu->iommu_units[0].reg_map.gpuaddr);
+	} else {
+		kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
 	}
 
 	mmu->hwpagetable = mmu->defaultpagetable;
@@ -1535,7 +1091,6 @@
 	 * changing pagetables we can use this lsb value of the pagetable w/o
 	 * having to read it again
 	 */
-	msm_iommu_lock();
 	for (i = 0; i < iommu->unit_count; i++) {
 		struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
 		for (j = 0; j < iommu_unit->dev_count; j++) {
@@ -1546,13 +1101,6 @@
 						TTBR0));
 		}
 	}
-	kgsl_iommu_lock_rb_in_tlb(mmu);
-	msm_iommu_unlock();
-
-	/* For complete CFF */
-	kgsl_cffdump_setmem(mmu->setstate_memory.gpuaddr +
-				KGSL_IOMMU_SETSTATE_NOP_OFFSET,
-				cp_nop_packet(1), sizeof(unsigned int));
 
 	kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
 	mmu->flags |= KGSL_FLAGS_STARTED;
@@ -1566,13 +1114,13 @@
 }
 
 static int
-kgsl_iommu_unmap(struct kgsl_pagetable *pt,
+kgsl_iommu_unmap(void *mmu_specific_pt,
 		struct kgsl_memdesc *memdesc,
 		unsigned int *tlb_flags)
 {
 	int ret;
 	unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
-	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
 
 	/* All GPU addresses as assigned are page aligned, but some
 	   functions purturb the gpuaddr with an offset, so apply the
@@ -1589,58 +1137,43 @@
 			"with err: %d\n", iommu_pt->domain, gpuaddr,
 			range, ret);
 
+#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
 	/*
 	 * Flushing only required if per process pagetables are used. With
 	 * global case, flushing will happen inside iommu_map function
 	 */
-	if (!ret && kgsl_mmu_is_perprocess(pt->mmu))
+	if (!ret && msm_soc_version_supports_iommu_v1())
 		*tlb_flags = UINT_MAX;
+#endif
 	return 0;
 }
 
 static int
-kgsl_iommu_map(struct kgsl_pagetable *pt,
+kgsl_iommu_map(void *mmu_specific_pt,
 			struct kgsl_memdesc *memdesc,
 			unsigned int protflags,
 			unsigned int *tlb_flags)
 {
 	int ret;
 	unsigned int iommu_virt_addr;
-	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
 	int size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
 
 	BUG_ON(NULL == iommu_pt);
 
-	/* if there's a guard page, we'll map it read only below */
-	if ((protflags & IOMMU_WRITE) && kgsl_memdesc_has_guard_page(memdesc))
-			size -= PAGE_SIZE;
 
 	iommu_virt_addr = memdesc->gpuaddr;
 
 	ret = iommu_map_range(iommu_pt->domain, iommu_virt_addr, memdesc->sg,
-				size, protflags);
+				size, (IOMMU_READ | IOMMU_WRITE));
 	if (ret) {
-		KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %x) err: %d\n",
-			iommu_pt->domain, iommu_virt_addr, memdesc->sg, size,
-			protflags, ret);
+		KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) "
+				"failed with err: %d\n", iommu_pt->domain,
+				iommu_virt_addr, memdesc->sg, size,
+				(IOMMU_READ | IOMMU_WRITE), ret);
 		return ret;
 	}
-	if ((protflags & IOMMU_WRITE) && kgsl_memdesc_has_guard_page(memdesc)) {
-		struct scatterlist *sg = &memdesc->sg[memdesc->sglen - 1];
 
-		ret = iommu_map(iommu_pt->domain, iommu_virt_addr + size,
-				kgsl_get_sg_pa(sg), PAGE_SIZE,
-				protflags & ~IOMMU_WRITE);
-		if (ret) {
-			KGSL_CORE_ERR("iommu_map(%p, %x, %x, %x) err: %d\n",
-				iommu_pt->domain, iommu_virt_addr + size,
-				kgsl_get_sg_pa(sg), protflags & ~IOMMU_WRITE,
-				ret);
-			/* cleanup the partial mapping */
-			iommu_unmap_range(iommu_pt->domain, iommu_virt_addr,
-					  size);
-		}
-	}
 	return ret;
 }
 
@@ -1653,6 +1186,7 @@
 	 *
 	 *  call this with the global lock held
 	 */
+
 	if (mmu->flags & KGSL_FLAGS_STARTED) {
 		/* detach iommu attachment */
 		kgsl_detach_pagetable_iommu_domain(mmu);
@@ -1667,12 +1201,10 @@
 				for (j = 0; j < iommu_unit->dev_count; j++) {
 					if (iommu_unit->dev[j].fault) {
 						kgsl_iommu_enable_clk(mmu, j);
-						msm_iommu_lock();
 						KGSL_IOMMU_SET_CTX_REG(iommu,
 						iommu_unit,
 						iommu_unit->dev[j].ctx_id,
 						RESUME, 1);
-						msm_iommu_unlock();
 						iommu_unit->dev[j].fault = 0;
 					}
 				}
@@ -1690,28 +1222,22 @@
 {
 	struct kgsl_iommu *iommu = mmu->priv;
 	int i;
-
-	if (mmu->priv_bank_table != NULL) {
-		kgsl_iommu_cleanup_regs(mmu, mmu->priv_bank_table);
-		kgsl_mmu_putpagetable(mmu->priv_bank_table);
-	}
-
-	if (mmu->defaultpagetable != NULL)
-		kgsl_mmu_putpagetable(mmu->defaultpagetable);
-
 	for (i = 0; i < iommu->unit_count; i++) {
-		struct kgsl_memdesc *reg_map = &iommu->iommu_units[i].reg_map;
-
-		if (reg_map->hostptr)
-			iounmap(reg_map->hostptr);
-		kgsl_sg_free(reg_map->sg, reg_map->sglen);
-		reg_map->priv &= ~KGSL_MEMDESC_GLOBAL;
+		struct kgsl_pagetable *pagetable = (mmu->priv_bank_table ?
+			mmu->priv_bank_table : mmu->defaultpagetable);
+		if (iommu->iommu_units[i].reg_map.gpuaddr)
+			kgsl_mmu_unmap(pagetable,
+			&(iommu->iommu_units[i].reg_map));
+		if (iommu->iommu_units[i].reg_map.hostptr)
+			iounmap(iommu->iommu_units[i].reg_map.hostptr);
+		kgsl_sg_free(iommu->iommu_units[i].reg_map.sg,
+				iommu->iommu_units[i].reg_map.sglen);
 	}
-	/* clear IOMMU GPU CPU sync structures */
-	kgsl_sg_free(iommu->sync_lock_desc.sg, iommu->sync_lock_desc.sglen);
-	memset(&iommu->sync_lock_desc, 0, sizeof(iommu->sync_lock_desc));
-	iommu->sync_lock_vars = NULL;
 
+	if (mmu->priv_bank_table)
+		kgsl_mmu_putpagetable(mmu->priv_bank_table);
+	if (mmu->defaultpagetable)
+		kgsl_mmu_putpagetable(mmu->defaultpagetable);
 	kfree(iommu);
 
 	return 0;
@@ -1767,15 +1293,12 @@
 	pt_base &= (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
 			iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
 
-	/* For v0 SMMU GPU needs to be idle for tlb invalidate as well */
-	if (msm_soc_version_supports_iommu_v0())
-		kgsl_idle(mmu->device);
-
+	//if (msm_soc_version_supports_iommu_v1())
 	/* Acquire GPU-CPU sync Lock here */
 	msm_iommu_lock();
 
 	if (flags & KGSL_MMUFLAGS_PTUPDATE) {
-		if (!msm_soc_version_supports_iommu_v0())
+		if (!msm_soc_version_supports_iommu_v1())
 			kgsl_idle(mmu->device);
 		for (i = 0; i < iommu->unit_count; i++) {
 			/* get the lsb value which should not change when
@@ -1854,9 +1377,6 @@
 	.mmu_get_num_iommu_units = kgsl_iommu_get_num_iommu_units,
 	.mmu_pt_equal = kgsl_iommu_pt_equal,
 	.mmu_get_pt_base_addr = kgsl_iommu_get_pt_base_addr,
-	/* These callbacks will be set on some chipsets */
-	.mmu_setup_pt = NULL,
-	.mmu_cleanup_pt = NULL,
 	.mmu_sync_lock = kgsl_iommu_sync_lock,
 	.mmu_sync_unlock = kgsl_iommu_sync_unlock,
 };
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index c09bc4b..4507700 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -15,37 +15,10 @@
 
 #include <mach/iommu.h>
 
-#define KGSL_IOMMU_CTX_OFFSET_V0	0
-#define KGSL_IOMMU_CTX_OFFSET_V1	0x8000
+#define KGSL_IOMMU_CTX_OFFSET_V1	0
+#define KGSL_IOMMU_CTX_OFFSET_V2	0x8000
 #define KGSL_IOMMU_CTX_SHIFT		12
 
-/* TLBLKCR fields */
-#define KGSL_IOMMU_TLBLKCR_LKE_MASK		0x00000001
-#define KGSL_IOMMU_TLBLKCR_LKE_SHIFT		0
-#define KGSL_IOMMU_TLBLKCR_TLBIALLCFG_MASK	0x00000001
-#define KGSL_IOMMU_TLBLKCR_TLBIALLCFG_SHIFT	1
-#define KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_MASK	0x00000001
-#define KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_SHIFT	2
-#define KGSL_IOMMU_TLBLKCR_TLBIVAACFG_MASK	0x00000001
-#define KGSL_IOMMU_TLBLKCR_TLBIVAACFG_SHIFT	3
-#define KGSL_IOMMU_TLBLKCR_FLOOR_MASK		0x000000FF
-#define KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT		8
-#define KGSL_IOMMU_TLBLKCR_VICTIM_MASK		0x000000FF
-#define KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT		16
-
-/* V2PXX fields */
-#define KGSL_IOMMU_V2PXX_INDEX_MASK		0x000000FF
-#define KGSL_IOMMU_V2PXX_INDEX_SHIFT		0
-#define KGSL_IOMMU_V2PXX_VA_MASK		0x000FFFFF
-#define KGSL_IOMMU_V2PXX_VA_SHIFT		12
-
-/* FSYNR1 V0 fields */
-#define KGSL_IOMMU_FSYNR1_AWRITE_MASK		0x00000001
-#define KGSL_IOMMU_FSYNR1_AWRITE_SHIFT		8
-/* FSYNR0 V1 fields */
-#define KGSL_IOMMU_V1_FSYNR0_WNR_MASK		0x00000001
-#define KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT		4
-
 enum kgsl_iommu_reg_map {
 	KGSL_IOMMU_GLOBAL_BASE = 0,
 	KGSL_IOMMU_CTX_TTBR0,
@@ -53,10 +26,6 @@
 	KGSL_IOMMU_CTX_FSR,
 	KGSL_IOMMU_CTX_TLBIALL,
 	KGSL_IOMMU_CTX_RESUME,
-	KGSL_IOMMU_CTX_TLBLKCR,
-	KGSL_IOMMU_CTX_V2PUR,
-	KGSL_IOMMU_CTX_FSYNR0,
-	KGSL_IOMMU_CTX_FSYNR1,
 	KGSL_IOMMU_REG_MAX
 };
 
@@ -155,8 +124,6 @@
  * IOMMU registers
  * @sync_lock_desc: GPU Memory descriptor for the memory containing the
  * spinlocks
- * @sync_lock_offset - The page offset within a page at which the sync
- * variables are located
  * @sync_lock_initialized: True if the sync_lock feature is enabled
  */
 struct kgsl_iommu {
@@ -169,7 +136,6 @@
 	struct kgsl_iommu_register_list *iommu_reg_list;
 	struct remote_iommu_petersons_spinlock *sync_lock_vars;
 	struct kgsl_memdesc sync_lock_desc;
-	unsigned int sync_lock_offset;
 	bool sync_lock_initialized;
 };
 
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
index a7832e4..83d14f7 100644
--- a/drivers/gpu/msm/kgsl_log.h
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011,2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index 4e95373..fce0d01 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,11 +23,13 @@
 
 #include "kgsl.h"
 #include "kgsl_mmu.h"
-#include "kgsl_gpummu.h"
 #include "kgsl_device.h"
 #include "kgsl_sharedmem.h"
 #include "adreno.h"
 
+#define KGSL_MMU_ALIGN_SHIFT    13
+#define KGSL_MMU_ALIGN_MASK     (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
+
 static enum kgsl_mmutype kgsl_mmu_type;
 
 static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable);
@@ -35,18 +37,17 @@
 static int kgsl_cleanup_pt(struct kgsl_pagetable *pt)
 {
 	int i;
-	struct kgsl_device *device;
-
+	/* For IOMMU only unmap the global structures to global pt */
+	if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
+		(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
+		(KGSL_MMU_GLOBAL_PT !=  pt->name) &&
+		(KGSL_MMU_PRIV_BANK_TABLE_NAME !=  pt->name))
+		return 0;
 	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		device = kgsl_driver.devp[i];
+		struct kgsl_device *device = kgsl_driver.devp[i];
 		if (device)
 			device->ftbl->cleanup_pt(device, pt);
 	}
-	/* Only the 3d device needs mmu specific pt entries */
-	device = kgsl_driver.devp[KGSL_DEVICE_3D0];
-	if (device->mmu.mmu_ops->mmu_cleanup_pt != NULL)
-		device->mmu.mmu_ops->mmu_cleanup_pt(&device->mmu, pt);
-
 	return 0;
 }
 
@@ -55,25 +56,21 @@
 {
 	int i = 0;
 	int status = 0;
-	struct kgsl_device *device;
 
+	/* For IOMMU only map the global structures to global pt */
+	if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
+		(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
+		(KGSL_MMU_GLOBAL_PT !=  pt->name) &&
+		(KGSL_MMU_PRIV_BANK_TABLE_NAME !=  pt->name))
+		return 0;
 	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		device = kgsl_driver.devp[i];
+		struct kgsl_device *device = kgsl_driver.devp[i];
 		if (device) {
 			status = device->ftbl->setup_pt(device, pt);
 			if (status)
 				goto error_pt;
 		}
 	}
-	/* Only the 3d device needs mmu specific pt entries */
-	device = kgsl_driver.devp[KGSL_DEVICE_3D0];
-	if (device->mmu.mmu_ops->mmu_setup_pt != NULL) {
-		status = device->mmu.mmu_ops->mmu_setup_pt(&device->mmu, pt);
-		if (status) {
-			i = KGSL_DEVICE_MAX - 1;
-			goto error_pt;
-		}
-	}
 	return status;
 error_pt:
 	while (i >= 0) {
@@ -104,7 +101,7 @@
 	if (pagetable->pool)
 		gen_pool_destroy(pagetable->pool);
 
-	pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
+	pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv);
 
 	kfree(pagetable);
 }
@@ -194,7 +191,7 @@
 
 	if (pt) {
 		ret += snprintf(buf, PAGE_SIZE, "0x%x\n",
-			kgsl_mmu_get_ptsize(pt->mmu));
+			kgsl_mmu_get_ptsize());
 	}
 
 	kgsl_put_pagetable(pt);
@@ -313,6 +310,22 @@
 	return ret;
 }
 
+unsigned int kgsl_mmu_get_ptsize(void)
+{
+	/*
+	 * For IOMMU, we could do up to 4G virtual range if we wanted to, but
+	 * it makes more sense to return a smaller range and leave the rest of
+	 * the virtual range for future improvements
+	 */
+
+	if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type)
+		return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
+	else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type)
+		return SZ_2G - KGSL_PAGETABLE_BASE;
+	else
+		return 0;
+}
+
 int
 kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, unsigned int pt_base)
 {
@@ -342,7 +355,7 @@
 	unsigned int ret = 0;
 
 	if (!mmu->mmu_ops || !mmu->mmu_ops->mmu_pt_equal)
-		return 0;
+		return KGSL_MMU_GLOBAL_PT;
 	spin_lock(&kgsl_driver.ptlock);
 	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
 		if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
@@ -445,8 +458,7 @@
 }
 EXPORT_SYMBOL(kgsl_mh_intrcallback);
 
-static struct kgsl_pagetable *
-kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu,
+static struct kgsl_pagetable *kgsl_mmu_createpagetableobject(
 				unsigned int name)
 {
 	int status = 0;
@@ -465,8 +477,8 @@
 
 	spin_lock_init(&pagetable->lock);
 
-	ptsize = kgsl_mmu_get_ptsize(mmu);
-	pagetable->mmu = mmu;
+	ptsize = kgsl_mmu_get_ptsize();
+
 	pagetable->name = name;
 	pagetable->max_entries = KGSL_PAGETABLE_ENTRIES(ptsize);
 	pagetable->fault_addr = 0xFFFFFFFF;
@@ -478,10 +490,10 @@
 	if ((KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) &&
 		((KGSL_MMU_GLOBAL_PT == name) ||
 		(KGSL_MMU_PRIV_BANK_TABLE_NAME == name))) {
-		pagetable->kgsl_pool = gen_pool_create(ilog2(SZ_8K), -1);
+		pagetable->kgsl_pool = gen_pool_create(PAGE_SHIFT, -1);
 		if (pagetable->kgsl_pool == NULL) {
 			KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
-					ilog2(SZ_8K));
+					KGSL_MMU_ALIGN_SHIFT);
 			goto err_alloc;
 		}
 		if (gen_pool_add(pagetable->kgsl_pool,
@@ -492,14 +504,14 @@
 		}
 	}
 
-	pagetable->pool = gen_pool_create(PAGE_SHIFT, -1);
+	pagetable->pool = gen_pool_create(KGSL_MMU_ALIGN_SHIFT, -1);
 	if (pagetable->pool == NULL) {
 		KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
-			      PAGE_SHIFT);
+			      KGSL_MMU_ALIGN_SHIFT);
 		goto err_kgsl_pool;
 	}
 
-	if (gen_pool_add(pagetable->pool, kgsl_mmu_get_base_addr(mmu),
+	if (gen_pool_add(pagetable->pool, KGSL_PAGETABLE_BASE,
 				ptsize, -1)) {
 		KGSL_CORE_ERR("gen_pool_add failed\n");
 		goto err_pool;
@@ -528,7 +540,7 @@
 	return pagetable;
 
 err_mmu_create:
-	pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
+	pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv);
 err_pool:
 	gen_pool_destroy(pagetable->pool);
 err_kgsl_pool:
@@ -540,21 +552,25 @@
 	return NULL;
 }
 
-struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu,
-						unsigned long name)
+struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name)
 {
 	struct kgsl_pagetable *pt;
 
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
 		return (void *)(-1);
 
-	if (!kgsl_mmu_is_perprocess(mmu))
+#ifndef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
+	name = KGSL_MMU_GLOBAL_PT;
+#endif
+	/* We presently do not support per-process for IOMMU-v2 */
+/*
+	if (!msm_soc_version_supports_iommu_v1())
 		name = KGSL_MMU_GLOBAL_PT;
-
+*/
 	pt = kgsl_get_pagetable(name);
 
 	if (pt == NULL)
-		pt = kgsl_mmu_createpagetableobject(mmu, name);
+		pt = kgsl_mmu_createpagetableobject(name);
 
 	return pt;
 }
@@ -611,15 +627,24 @@
 	 */
 }
 
+static inline struct gen_pool *
+_get_pool(struct kgsl_pagetable *pagetable, unsigned int flags)
+{
+	if (pagetable->kgsl_pool &&
+		(KGSL_MEMDESC_GLOBAL & flags))
+		return pagetable->kgsl_pool;
+	return pagetable->pool;
+}
+
 int
 kgsl_mmu_map(struct kgsl_pagetable *pagetable,
-				struct kgsl_memdesc *memdesc)
+				struct kgsl_memdesc *memdesc,
+				unsigned int protflags)
 {
 	int ret;
-	struct gen_pool *pool = NULL;
+	struct gen_pool *pool;
 	int size;
 	int page_align = ilog2(PAGE_SIZE);
-	unsigned int protflags = kgsl_memdesc_protflags(memdesc);
 
 	if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) {
 		if (memdesc->sglen == 1) {
@@ -641,57 +666,33 @@
 
 	size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
 
-	pool = pagetable->pool;
+	/* Allocate from kgsl pool if it exists for global mappings */
+	pool = _get_pool(pagetable, memdesc->priv);
 
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-		/* Allocate aligned virtual addresses for iommu. This allows
-		 * more efficient pagetable entries if the physical memory
-		 * is also aligned. Don't do this for GPUMMU, because
-		 * the address space is so small.
-		 */
-		if (kgsl_memdesc_get_align(memdesc) > 0)
-			page_align = kgsl_memdesc_get_align(memdesc);
-		if (kgsl_memdesc_is_global(memdesc)) {
-			/*
-			 * Only the default pagetable has a kgsl_pool, and
-			 * it is responsible for creating the mapping for
-			 * each global buffer. The mapping will be reused
-			 * in all other pagetables and it must already exist
-			 * when we're creating other pagetables which do not
-			 * have a kgsl_pool.
-			 */
-			pool = pagetable->kgsl_pool;
-			if (pool == NULL && memdesc->gpuaddr == 0) {
-				KGSL_CORE_ERR(
-				  "No address for global mapping into pt %d\n",
-				  pagetable->name);
-				return -EINVAL;
-			}
-		} else if (kgsl_memdesc_use_cpu_map(memdesc)) {
-			if (memdesc->gpuaddr == 0)
-				return -EINVAL;
-			pool = NULL;
-		}
-	}
-	if (pool) {
-		memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size,
-							  page_align);
-		if (memdesc->gpuaddr == 0) {
-			KGSL_CORE_ERR("gen_pool_alloc(%d) failed, pool: %s\n",
-					size,
-					(pool == pagetable->kgsl_pool) ?
-					"kgsl_pool" : "general_pool");
-			KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
-					pagetable->name,
-					pagetable->stats.mapped,
-					pagetable->stats.entries);
-			return -ENOMEM;
-		}
+	/* Allocate aligned virtual addresses for iommu. This allows
+	 * more efficient pagetable entries if the physical memory
+	 * is also aligned. Don't do this for GPUMMU, because
+	 * the address space is so small.
+	 */
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype() &&
+	    kgsl_memdesc_get_align(memdesc) > 0)
+		page_align = kgsl_memdesc_get_align(memdesc);
+
+	memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size, page_align);
+	if (memdesc->gpuaddr == 0) {
+		KGSL_CORE_ERR("gen_pool_alloc(%d) failed from pool: %s\n",
+			size,
+			(pool == pagetable->kgsl_pool) ?
+			"kgsl_pool" : "general_pool");
+		KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
+				pagetable->name, pagetable->stats.mapped,
+				pagetable->stats.entries);
+		return -ENOMEM;
 	}
 
 	if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
 		spin_lock(&pagetable->lock);
-	ret = pagetable->pt_ops->mmu_map(pagetable, memdesc, protflags,
+	ret = pagetable->pt_ops->mmu_map(pagetable->priv, memdesc, protflags,
 						&pagetable->tlb_flags);
 	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
 		spin_lock(&pagetable->lock);
@@ -713,8 +714,7 @@
 
 err_free_gpuaddr:
 	spin_unlock(&pagetable->lock);
-	if (pool)
-		gen_pool_free(pool, memdesc->gpuaddr, size);
+	gen_pool_free(pool, memdesc->gpuaddr, size);
 	memdesc->gpuaddr = 0;
 	return ret;
 }
@@ -744,7 +744,7 @@
 
 	if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
 		spin_lock(&pagetable->lock);
-	pagetable->pt_ops->mmu_unmap(pagetable, memdesc,
+	pagetable->pt_ops->mmu_unmap(pagetable->priv, memdesc,
 					&pagetable->tlb_flags);
 
 	/* If buffer is unmapped 0 fault addr */
@@ -760,29 +760,21 @@
 
 	spin_unlock(&pagetable->lock);
 
-	pool = pagetable->pool;
-
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-		if (kgsl_memdesc_is_global(memdesc))
-			pool = pagetable->kgsl_pool;
-		else if (kgsl_memdesc_use_cpu_map(memdesc))
-			pool = NULL;
-	}
-	if (pool)
-		gen_pool_free(pool, memdesc->gpuaddr, size);
+	pool = _get_pool(pagetable, memdesc->priv);
+	gen_pool_free(pool, memdesc->gpuaddr, size);
 
 	/*
 	 * Don't clear the gpuaddr on global mappings because they
 	 * may be in use by other pagetables
 	 */
-	if (!kgsl_memdesc_is_global(memdesc))
+	if (!(memdesc->priv & KGSL_MEMDESC_GLOBAL))
 		memdesc->gpuaddr = 0;
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_mmu_unmap);
 
 int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
-			struct kgsl_memdesc *memdesc)
+			struct kgsl_memdesc *memdesc, unsigned int protflags)
 {
 	int result = -EINVAL;
 	unsigned int gpuaddr = 0;
@@ -794,17 +786,19 @@
 	/* Not all global mappings are needed for all MMU types */
 	if (!memdesc->size)
 		return 0;
+
 	gpuaddr = memdesc->gpuaddr;
 	memdesc->priv |= KGSL_MEMDESC_GLOBAL;
 
-	result = kgsl_mmu_map(pagetable, memdesc);
+	result = kgsl_mmu_map(pagetable, memdesc, protflags);
 	if (result)
 		goto error;
 
 	/*global mappings must have the same gpu address in all pagetables*/
 	if (gpuaddr && gpuaddr != memdesc->gpuaddr) {
-		KGSL_CORE_ERR("pt %p addr mismatch phys %pa gpu 0x%0x 0x%08x",
-		     pagetable, &memdesc->physaddr, gpuaddr, memdesc->gpuaddr);
+		KGSL_CORE_ERR("pt %p addr mismatch phys 0x%08x"
+			"gpu 0x%0x 0x%08x", pagetable, memdesc->physaddr,
+			gpuaddr, memdesc->gpuaddr);
 		goto error_unmap;
 	}
 	return result;
@@ -897,18 +891,12 @@
 }
 EXPORT_SYMBOL(kgsl_mmu_set_mmutype);
 
-int kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, unsigned int gpuaddr)
+int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr)
 {
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
 		return 1;
-	if (gpuaddr >= kgsl_mmu_get_base_addr(pt->mmu) &&
-		gpuaddr < kgsl_mmu_get_base_addr(pt->mmu) +
-		kgsl_mmu_get_ptsize(pt->mmu))
-		return 1;
-	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU
-		&& kgsl_mmu_is_perprocess(pt->mmu))
-		return (gpuaddr > 0 && gpuaddr < TASK_SIZE);
-	return 0;
+	return ((gpuaddr >= KGSL_PAGETABLE_BASE) &&
+		(gpuaddr < (KGSL_PAGETABLE_BASE + kgsl_mmu_get_ptsize())));
 }
 EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
 
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index d7d9516..377f342 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -13,23 +13,16 @@
 #ifndef __KGSL_MMU_H
 #define __KGSL_MMU_H
 
-#include <mach/iommu.h>
-
 /*
- * These defines control the address range for allocations that
- * are mapped into all pagetables.
+ * These defines control the split between ttbr1 and ttbr0 pagetables of IOMMU
+ * and what ranges of memory we map to them
  */
-#define KGSL_IOMMU_GLOBAL_MEM_BASE	0xf8000000
+#define KGSL_IOMMU_GLOBAL_MEM_BASE	0xC0000000
 #define KGSL_IOMMU_GLOBAL_MEM_SIZE	SZ_4M
+#define KGSL_IOMMU_TTBR1_SPLIT		2
 
-#define KGSL_MMU_ALIGN_MASK     (~((1 << PAGE_SHIFT) - 1))
-
-/* defconfig option for disabling per process pagetables */
-#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
-#define KGSL_MMU_USE_PER_PROCESS_PT true
-#else
-#define KGSL_MMU_USE_PER_PROCESS_PT false
-#endif
+#define KGSL_MMU_ALIGN_SHIFT    13
+#define KGSL_MMU_ALIGN_MASK     (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
 
 /* Identifier for the global page table */
 /* Per process page tables will probably pass in the thread group
@@ -123,7 +116,6 @@
 	unsigned int tlb_flags;
 	unsigned int fault_addr;
 	void *priv;
-	struct kgsl_mmu *mmu;
 };
 
 struct kgsl_mmu;
@@ -157,26 +149,24 @@
 	unsigned int (*mmu_get_pt_base_addr)
 			(struct kgsl_mmu *mmu,
 			struct kgsl_pagetable *pt);
-	int (*mmu_setup_pt) (struct kgsl_mmu *mmu,
-			struct kgsl_pagetable *pt);
-	void (*mmu_cleanup_pt) (struct kgsl_mmu *mmu,
-			struct kgsl_pagetable *pt);
 	unsigned int (*mmu_sync_lock)
-			(struct kgsl_mmu *mmu, unsigned int *cmds);
+			(struct kgsl_mmu *mmu,
+			unsigned int *cmds);
 	unsigned int (*mmu_sync_unlock)
-			(struct kgsl_mmu *mmu, unsigned int *cmds);
+			(struct kgsl_mmu *mmu,
+			unsigned int *cmds);
 };
 
 struct kgsl_mmu_pt_ops {
-	int (*mmu_map) (struct kgsl_pagetable *pt,
+	int (*mmu_map) (void *mmu_pt,
 			struct kgsl_memdesc *memdesc,
 			unsigned int protflags,
 			unsigned int *tlb_flags);
-	int (*mmu_unmap) (struct kgsl_pagetable *pt,
+	int (*mmu_unmap) (void *mmu_pt,
 			struct kgsl_memdesc *memdesc,
 			unsigned int *tlb_flags);
 	void *(*mmu_create_pagetable) (void);
-	void (*mmu_destroy_pagetable) (struct kgsl_pagetable *);
+	void (*mmu_destroy_pagetable) (void *pt);
 };
 
 #define KGSL_MMU_FLAGS_IOMMU_SYNC BIT(31)
@@ -195,19 +185,14 @@
 	const struct kgsl_mmu_ops *mmu_ops;
 	void *priv;
 	int fault;
-	unsigned long pt_base;
-	unsigned long pt_size;
-	bool pt_per_process;
-	bool use_cpu_map;
 };
 
+#include "kgsl_gpummu.h"
+
 extern struct kgsl_mmu_ops iommu_ops;
 extern struct kgsl_mmu_pt_ops iommu_pt_ops;
-extern struct kgsl_mmu_ops gpummu_ops;
-extern struct kgsl_mmu_pt_ops gpummu_pt_ops;
 
-struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *,
-						unsigned long name);
+struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name);
 void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable);
 void kgsl_mh_start(struct kgsl_device *device);
 void kgsl_mh_intrcallback(struct kgsl_device *device);
@@ -215,9 +200,10 @@
 int kgsl_mmu_start(struct kgsl_device *device);
 int kgsl_mmu_close(struct kgsl_device *device);
 int kgsl_mmu_map(struct kgsl_pagetable *pagetable,
-		 struct kgsl_memdesc *memdesc);
+		 struct kgsl_memdesc *memdesc,
+		 unsigned int protflags);
 int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
-			struct kgsl_memdesc *memdesc);
+			struct kgsl_memdesc *memdesc, unsigned int protflags);
 int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
 		    struct kgsl_memdesc *memdesc);
 unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
@@ -234,7 +220,8 @@
 int kgsl_mmu_enabled(void);
 void kgsl_mmu_set_mmutype(char *mmutype);
 enum kgsl_mmutype kgsl_mmu_get_mmutype(void);
-int kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, unsigned int gpuaddr);
+unsigned int kgsl_mmu_get_ptsize(void);
+int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr);
 
 /*
  * Static inline functions of MMU that simply call the SMMU specific
@@ -345,56 +332,6 @@
 		return 0;
 }
 
-/*
- * kgsl_mmu_is_perprocess() - Runtime check for per-process
- * pagetables.
- * @mmu: the mmu
- *
- * Returns true if per-process pagetables are enabled,
- * false if not.
- */
-static inline int kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu)
-{
-	return mmu->pt_per_process;
-}
-
-/*
- * kgsl_mmu_use_cpu_map() - Runtime check for matching the CPU
- * address space on the GPU.
- * @mmu: the mmu
- *
- * Returns true if supported false if not.
- */
-static inline int kgsl_mmu_use_cpu_map(struct kgsl_mmu *mmu)
-{
-	return mmu->pt_per_process;
-}
-
-/*
- * kgsl_mmu_base_addr() - Get gpu virtual address base.
- * @mmu: the mmu
- *
- * Returns the start address of the allocatable gpu
- * virtual address space. Other mappings that mirror
- * the CPU address space are possible outside this range.
- */
-static inline unsigned int kgsl_mmu_get_base_addr(struct kgsl_mmu *mmu)
-{
-	return mmu->pt_base;
-}
-
-/*
- * kgsl_mmu_get_ptsize() - Get gpu pagetable size
- * @mmu: the mmu
- *
- * Returns the usable size of the gpu allocatable
- * address space.
- */
-static inline unsigned int kgsl_mmu_get_ptsize(struct kgsl_mmu *mmu)
-{
-	return mmu->pt_size;
-}
-
 static inline int kgsl_mmu_sync_lock(struct kgsl_mmu *mmu,
 				unsigned int *cmds)
 {
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 2f8d93e..d489119 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -23,13 +23,13 @@
 #include "kgsl_pwrscale.h"
 #include "kgsl_device.h"
 #include "kgsl_trace.h"
-#include "kgsl_sharedmem.h"
 
 #define KGSL_PWRFLAGS_POWER_ON 0
 #define KGSL_PWRFLAGS_CLK_ON   1
 #define KGSL_PWRFLAGS_AXI_ON   2
 #define KGSL_PWRFLAGS_IRQ_ON   3
 
+#define GPU_SWFI_LATENCY	3
 #define UPDATE_BUSY_VAL		1000000
 #define UPDATE_BUSY		50
 
@@ -130,16 +130,6 @@
 	 */
 
 	pwr->active_pwrlevel = new_level;
-	pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
-
-	if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
-
-		if (pwr->pcl)
-			msm_bus_scale_client_update_request(pwr->pcl,
-				pwrlevel->bus_freq);
-		else if (pwr->ebi1_clk)
-			clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
-	}
 
 	if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) ||
 		(device->state == KGSL_STATE_NAP)) {
@@ -166,6 +156,16 @@
 		}
 	}
 
+	pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
+
+	if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
+
+		if (pwr->pcl)
+			msm_bus_scale_client_update_request(pwr->pcl,
+				pwrlevel->bus_freq);
+		else if (pwr->ebi1_clk)
+			clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
+	}
 
 	trace_kgsl_pwrlevel(device, pwr->active_pwrlevel, pwrlevel->gpu_freq);
 }
@@ -357,13 +357,13 @@
 	return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1);
 }
 
-/* Given a GPU clock value, return the lowest matching powerlevel */
+/* Given a GPU clock value, return the nearest powerlevel */
 
 static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock)
 {
 	int i;
 
-	for (i = pwr->num_pwrlevels - 1; i >= 0; i--) {
+	for (i = 0; i < pwr->num_pwrlevels - 1; i++) {
 		if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000)
 			return i;
 	}
@@ -515,6 +515,7 @@
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
 	struct kgsl_pwrctrl *pwr;
 	const long div = 1000/HZ;
+	static unsigned int org_interval_timeout = 1;
 	int rc;
 
 	if (device == NULL)
@@ -527,11 +528,15 @@
 	if (rc)
 		return rc;
 
+	if (org_interval_timeout == 1)
+		org_interval_timeout = pwr->interval_timeout;
+
 	mutex_lock(&device->mutex);
 
 	/* Let the timeout be requested in ms, but convert to jiffies. */
 	val /= div;
-	pwr->interval_timeout = val;
+	if (val >= org_interval_timeout)
+		pwr->interval_timeout = val;
 
 	mutex_unlock(&device->mutex);
 
@@ -543,48 +548,10 @@
 					char *buf)
 {
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	int mul = 1000/HZ;
-	if (device == NULL)
-		return 0;
-	/* Show the idle_timeout converted to msec */
-	return snprintf(buf, PAGE_SIZE, "%d\n",
-		device->pwrctrl.interval_timeout * mul);
-}
-
-static int kgsl_pwrctrl_pmqos_latency_store(struct device *dev,
-					struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	char temp[20];
-	unsigned long val;
-	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	int rc;
-
-	if (device == NULL)
-		return 0;
-
-	snprintf(temp, sizeof(temp), "%.*s",
-			(int)min(count, sizeof(temp) - 1), buf);
-	rc = kstrtoul(temp, 0, &val);
-	if (rc)
-		return rc;
-
-	mutex_lock(&device->mutex);
-	device->pwrctrl.pm_qos_latency = val;
-	mutex_unlock(&device->mutex);
-
-	return count;
-}
-
-static int kgsl_pwrctrl_pmqos_latency_show(struct device *dev,
-					   struct device_attribute *attr,
-					   char *buf)
-{
-	struct kgsl_device *device = kgsl_device_from_dev(dev);
 	if (device == NULL)
 		return 0;
 	return snprintf(buf, PAGE_SIZE, "%d\n",
-		device->pwrctrl.pm_qos_latency);
+		device->pwrctrl.interval_timeout);
 }
 
 static int kgsl_pwrctrl_gpubusy_show(struct device *dev,
@@ -648,14 +615,6 @@
 	return num_chars;
 }
 
-static int kgsl_pwrctrl_reset_count_show(struct device *dev,
-					struct device_attribute *attr,
-					char *buf)
-{
-	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter);
-}
-
 DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show, kgsl_pwrctrl_gpuclk_store);
 DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show,
 	kgsl_pwrctrl_max_gpuclk_store);
@@ -681,12 +640,6 @@
 DEVICE_ATTR(num_pwrlevels, 0444,
 	kgsl_pwrctrl_num_pwrlevels_show,
 	NULL);
-DEVICE_ATTR(pmqos_latency, 0644,
-	kgsl_pwrctrl_pmqos_latency_show,
-	kgsl_pwrctrl_pmqos_latency_store);
-DEVICE_ATTR(reset_count, 0444,
-	kgsl_pwrctrl_reset_count_show,
-	NULL);
 
 static const struct device_attribute *pwrctrl_attr_list[] = {
 	&dev_attr_gpuclk,
@@ -700,8 +653,6 @@
 	&dev_attr_min_pwrlevel,
 	&dev_attr_thermal_pwrlevel,
 	&dev_attr_num_pwrlevels,
-	&dev_attr_pmqos_latency,
-	&dev_attr_reset_count,
 	NULL
 };
 
@@ -733,9 +684,6 @@
 	clkstats->on_time_old = on_time;
 	clkstats->elapsed_old = clkstats->elapsed;
 	clkstats->elapsed = 0;
-
-	trace_kgsl_gpubusy(device, clkstats->on_time_old,
-		clkstats->elapsed_old);
 }
 
 /* Track the amount of time the gpu is on vs the total system time. *
@@ -766,23 +714,23 @@
 			/* High latency clock maintenance. */
 			if ((pwr->pwrlevels[0].gpu_freq > 0) &&
 				(requested_state != KGSL_STATE_NAP)) {
-				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
-					if (pwr->grp_clks[i])
-						clk_unprepare(pwr->grp_clks[i]);
 				clk_set_rate(pwr->grp_clks[0],
 					pwr->pwrlevels[pwr->num_pwrlevels - 1].
 					gpu_freq);
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					if (pwr->grp_clks[i])
+						clk_unprepare(pwr->grp_clks[i]);
 			}
 			kgsl_pwrctrl_busy_time(device, true);
 		} else if (requested_state == KGSL_STATE_SLEEP) {
 			/* High latency clock maintenance. */
-			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
-				if (pwr->grp_clks[i])
-					clk_unprepare(pwr->grp_clks[i]);
 			if ((pwr->pwrlevels[0].gpu_freq > 0))
 				clk_set_rate(pwr->grp_clks[0],
 					pwr->pwrlevels[pwr->num_pwrlevels - 1].
 					gpu_freq);
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				if (pwr->grp_clks[i])
+					clk_unprepare(pwr->grp_clks[i]);
 		}
 	} else if (state == KGSL_PWRFLAGS_ON) {
 		if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON,
@@ -790,14 +738,15 @@
 			trace_kgsl_clk(device, state);
 			/* High latency clock maintenance. */
 			if (device->state != KGSL_STATE_NAP) {
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					if (pwr->grp_clks[i])
+						clk_prepare(pwr->grp_clks[i]);
+
 				if (pwr->pwrlevels[0].gpu_freq > 0)
 					clk_set_rate(pwr->grp_clks[0],
 						pwr->pwrlevels
 						[pwr->active_pwrlevel].
 						gpu_freq);
-				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
-					if (pwr->grp_clks[i])
-						clk_prepare(pwr->grp_clks[i]);
 			}
 			/* as last step, enable grp_clk
 			   this is to let GPU interrupt to come */
@@ -929,8 +878,7 @@
 	if (pdata->set_grp_async != NULL)
 		pdata->set_grp_async();
 
-	if (pdata->num_levels > KGSL_MAX_PWRLEVELS ||
-	    pdata->num_levels < 1) {
+	if (pdata->num_levels > KGSL_MAX_PWRLEVELS) {
 		KGSL_PWR_ERR(device, "invalid power level count: %d\n",
 					 pdata->num_levels);
 		result = -EINVAL;
@@ -999,11 +947,6 @@
 		}
 	}
 
-	/* Set the power level step multiplier with 1 as the default */
-	pwr->step_mul = pdata->step_mul ? pdata->step_mul : 1;
-
-	/* Set the CPU latency to 501usec to allow low latency PC modes */
-	pwr->pm_qos_latency = 501;
 
 	pm_runtime_enable(device->parentdev);
 	register_early_suspend(&device->display_off);
@@ -1200,7 +1143,7 @@
 		_sleep_accounting(device);
 		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP);
-		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+		pm_qos_update_request(&device->pm_qos_req_dma,
 					PM_QOS_DEFAULT_VALUE);
 		break;
 	case KGSL_STATE_SLEEP:
@@ -1234,7 +1177,7 @@
 		device->ftbl->stop(device);
 		_sleep_accounting(device);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
-		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+		pm_qos_update_request(&device->pm_qos_req_dma,
 						PM_QOS_DEFAULT_VALUE);
 		break;
 	case KGSL_STATE_SLUMBER:
@@ -1281,15 +1224,10 @@
 void kgsl_pwrctrl_wake(struct kgsl_device *device)
 {
 	int status;
-	unsigned int context_id;
-	unsigned int state = device->state;
-	unsigned int ts_processed = 0xdeaddead;
-	struct kgsl_context *context;
-
 	kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
 	switch (device->state) {
 	case KGSL_STATE_SLUMBER:
-		status = device->ftbl->start(device);
+		status = device->ftbl->start(device, 0);
 		if (status) {
 			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 			KGSL_DRV_ERR(device, "start failed %d\n", status);
@@ -1299,17 +1237,6 @@
 	case KGSL_STATE_SLEEP:
 		kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
 		kgsl_pwrscale_wake(device);
-		kgsl_sharedmem_readl(&device->memstore,
-			(unsigned int *) &context_id,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-				current_context));
-		context = idr_find(&device->context_idr, context_id);
-		if (context)
-			ts_processed = kgsl_readtimestamp(device, context,
-				KGSL_TIMESTAMP_RETIRED);
-		KGSL_PWR_INFO(device, "Wake from %s state. CTXT: %d RTRD TS: %08X\n",
-			kgsl_pwrstate_to_str(state),
-			context ? context->id : -1, ts_processed);
 		/* fall through */
 	case KGSL_STATE_NAP:
 		/* Turn on the core clocks */
@@ -1320,8 +1247,8 @@
 		/* Re-enable HW access */
 		mod_timer(&device->idle_timer,
 				jiffies + device->pwrctrl.interval_timeout);
-		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
-				device->pwrctrl.pm_qos_latency);
+		pm_qos_update_request(&device->pm_qos_req_dma,
+					GPU_SWFI_LATENCY);
 	case KGSL_STATE_ACTIVE:
 		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 		break;
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
index ced52e1..8d66505 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -60,9 +60,6 @@
  * @irq_name - resource name for the IRQ
  * @restore_slumber - Flag to indicate that we are in a suspend/restore sequence
  * @clk_stats - structure of clock statistics
- * @pm_qos_req_dma - the power management quality of service structure
- * @pm_qos_latency - allowed CPU latency in microseconds
- * @step_mul - multiplier for moving between power levels
  */
 
 struct kgsl_pwrctrl {
@@ -88,9 +85,6 @@
 	s64 time;
 	unsigned int restore_slumber;
 	struct kgsl_clk_stats clk_stats;
-	struct pm_qos_request pm_qos_req_dma;
-	unsigned int pm_qos_latency;
-	unsigned int step_mul;
 };
 
 void kgsl_pwrctrl_irq(struct kgsl_device *device, int state);
diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
index 9b2ac70..a647361 100644
--- a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
+++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -179,12 +179,6 @@
 	}
 	priv->bin.total_time = 0;
 	priv->bin.busy_time = 0;
-
-	/* If the decision is to move to a lower level, make sure the GPU
-	 * frequency drops.
-	 */
-	if (val > 0)
-		val *= pwr->step_mul;
 	if (val)
 		kgsl_pwrctrl_pwrlevel_change(device,
 					     pwr->active_pwrlevel + val);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index 595f78f..099f4cc 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -154,7 +154,9 @@
 
 static struct mem_entry_stats mem_stats[] = {
 	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel),
+#ifdef CONFIG_ANDROID_PMEM
 	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem),
+#endif
 #ifdef CONFIG_ASHMEM
 	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem),
 #endif
@@ -375,8 +377,11 @@
 		kgsl_driver.stats.vmalloc -= memdesc->size;
 	}
 	if (memdesc->sg)
-		for_each_sg(memdesc->sg, sg, sglen, i)
+		for_each_sg(memdesc->sg, sg, sglen, i){
+			if (sg->length == 0)
+				break;
 			__free_pages(sg_page(sg), get_order(sg->length));
+		}
 }
 
 static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc)
@@ -509,29 +514,21 @@
 
 void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op)
 {
-	/*
-	 * If the buffer is mapped in the kernel operate on that address
-	 * otherwise use the user address
-	 */
-
-	void *addr = (memdesc->hostptr) ?
-		memdesc->hostptr : (void *) memdesc->useraddr;
-
+	void *addr = memdesc->hostptr;
 	int size = memdesc->size;
 
-	if (addr !=  NULL) {
-		switch (op) {
-		case KGSL_CACHE_OP_FLUSH:
-			dmac_flush_range(addr, addr + size);
-			break;
-		case KGSL_CACHE_OP_CLEAN:
-			dmac_clean_range(addr, addr + size);
-			break;
-		case KGSL_CACHE_OP_INV:
-			dmac_inv_range(addr, addr + size);
-			break;
-		}
+	switch (op) {
+	case KGSL_CACHE_OP_FLUSH:
+		dmac_flush_range(addr, addr + size);
+		break;
+	case KGSL_CACHE_OP_CLEAN:
+		dmac_clean_range(addr, addr + size);
+		break;
+	case KGSL_CACHE_OP_INV:
+		dmac_inv_range(addr, addr + size);
+		break;
 	}
+
 	outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, op);
 }
 EXPORT_SYMBOL(kgsl_cache_range_op);
@@ -539,7 +536,7 @@
 static int
 _kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
 			struct kgsl_pagetable *pagetable,
-			size_t size)
+			size_t size, unsigned int protflags)
 {
 	int pcount = 0, order, ret = 0;
 	int j, len, page_size, sglen_alloc, sglen = 0;
@@ -547,15 +544,13 @@
 	pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
 	void *ptr;
 	unsigned int align;
-	int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
 
 	align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
 
 	page_size = (align >= ilog2(SZ_64K) && size >= SZ_64K)
 			? SZ_64K : PAGE_SIZE;
 	/* update align flags for what we actually use */
-	if (page_size != PAGE_SIZE)
-		kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+	kgsl_memdesc_set_align(memdesc, ilog2(page_size));
 
 	/*
 	 * There needs to be enough room in the sg structure to be able to
@@ -608,7 +603,7 @@
 	while (len > 0) {
 		struct page *page;
 		unsigned int gfp_mask = GFP_KERNEL | __GFP_HIGHMEM |
-			__GFP_NOWARN | __GFP_NORETRY;
+			__GFP_NOWARN;
 		int j;
 
 		/* don't waste space at the end of the allocation*/
@@ -672,42 +667,41 @@
 	 * zeroed and unmaped each individual page, and then we had to turn
 	 * around and call flush_dcache_page() on that page to clear the caches.
 	 * This was killing us for performance. Instead, we found it is much
-	 * faster to allocate the pages without GFP_ZERO, map a chunk of the
-	 * range ('step' pages), memset it, flush it and then unmap
-	 * - this results in a factor of 4 improvement for speed for large
-	 * buffers. There is a small decrease in speed for small buffers,
-	 * but only on the order of a few microseconds at best. The 'step'
-	 * size is based on a guess at the amount of free vmalloc space, but
-	 * will scale down if there's not enough free space.
+	 * faster to allocate the pages without GFP_ZERO, map the entire range,
+	 * memset it, flush the range and then unmap - this results in a factor
+	 * of 4 improvement for speed for large buffers.  There is a small
+	 * increase in speed for small buffers, but only on the order of a few
+	 * microseconds at best.  The only downside is that there needs to be
+	 * enough temporary space in vmalloc to accomodate the map. This
+	 * shouldn't be a problem, but if it happens, fall back to a much slower
+	 * path
 	 */
-	for (j = 0; j < pcount; j += step) {
-		step = min(step, pcount - j);
 
-		ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
+	ptr = vmap(pages, pcount, VM_IOREMAP, page_prot);
 
-		if (ptr != NULL) {
-			memset(ptr, 0, step * PAGE_SIZE);
-			dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
-			vunmap(ptr);
-		} else {
-			int k;
-			/* Very, very, very slow path */
+	if (ptr != NULL) {
+		memset(ptr, 0, memdesc->size);
+		dmac_flush_range(ptr, ptr + memdesc->size);
+		vunmap(ptr);
+	} else {
+		/* Very, very, very slow path */
 
-			for (k = j; k < j + step; k++) {
-				ptr = kmap_atomic(pages[k]);
-				memset(ptr, 0, PAGE_SIZE);
-				dmac_flush_range(ptr, ptr + PAGE_SIZE);
-				kunmap_atomic(ptr);
-			}
-			/* scale down the step size to avoid this path */
-			if (step > 1)
-				step >>= 1;
+		for (j = 0; j < pcount; j++) {
+			ptr = kmap_atomic(pages[j]);
+			memset(ptr, 0, PAGE_SIZE);
+			dmac_flush_range(ptr, ptr + PAGE_SIZE);
+			kunmap_atomic(ptr);
 		}
 	}
 
 	outer_cache_range_op_sg(memdesc->sg, memdesc->sglen,
 				KGSL_CACHE_OP_FLUSH);
 
+	ret = kgsl_mmu_map(pagetable, memdesc, protflags);
+
+	if (ret)
+		goto done;
+
 	KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
 		kgsl_driver.stats.page_alloc_max);
 
@@ -734,7 +728,8 @@
 
 	size = ALIGN(size, PAGE_SIZE * 2);
 
-	ret =  _kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
+	ret =  _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
+		GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 	if (!ret)
 		ret = kgsl_page_alloc_map_kernel(memdesc);
 	if (ret)
@@ -748,7 +743,17 @@
 			    struct kgsl_pagetable *pagetable,
 			    size_t size)
 {
-	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, PAGE_ALIGN(size));
+	unsigned int protflags;
+
+	if (size == 0)
+		return -EINVAL;
+
+	protflags = GSL_PT_PAGE_RV;
+	if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
+		protflags |= GSL_PT_PAGE_WV;
+
+	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
+		protflags);
 }
 EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
 
@@ -826,6 +831,12 @@
 	if (result)
 		goto err;
 
+	result = kgsl_mmu_map(pagetable, memdesc,
+		GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+
+	if (result)
+		goto err;
+
 	KGSL_STATS_ADD(size, kgsl_driver.stats.coherent,
 		kgsl_driver.stats.coherent_max);
 
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
index 279490f..a895a75 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -19,7 +19,6 @@
 #include "kgsl_mmu.h"
 #include <linux/slab.h>
 #include <linux/kmemleak.h>
-#include <linux/iommu.h>
 
 #include "kgsl_log.h"
 
@@ -84,18 +83,6 @@
 }
 
 /*
- * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc
- * @memdesc: the memdesc
- *
- * Returns a KGSL_CACHEMODE* value.
- */
-static inline int
-kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc)
-{
-	return (memdesc->flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT;
-}
-
-/*
  * kgsl_memdesc_set_align - Set alignment flags of a memdesc
  * @memdesc - the memdesc
  * @align - alignment requested, as a power of 2 exponent.
@@ -140,8 +127,13 @@
 {
 	if ((sglen * sizeof(struct scatterlist)) <  PAGE_SIZE)
 		return kzalloc(sglen * sizeof(struct scatterlist), GFP_KERNEL);
-	else
-		return vmalloc(sglen * sizeof(struct scatterlist));
+	else {
+		void *ptr = vmalloc(sglen * sizeof(struct scatterlist));
+		if (ptr)
+			memset(ptr, 0, sglen * sizeof(struct scatterlist));
+
+		return ptr;
+	}
 }
 
 static inline void kgsl_sg_free(void *ptr, unsigned int sglen)
@@ -157,7 +149,7 @@
 		unsigned int physaddr, unsigned int size)
 {
 	memdesc->sg = kgsl_sg_alloc(1);
-	if (memdesc->sg == NULL)
+	if (!memdesc->sg)
 		return -ENOMEM;
 
 	kmemleak_not_leak(memdesc->sg);
@@ -170,98 +162,14 @@
 	return 0;
 }
 
-/*
- * kgsl_memdesc_is_global - is this a globally mapped buffer?
- * @memdesc: the memdesc
- *
- * Returns nonzero if this is a global mapping, 0 otherwise
- */
-static inline int kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc)
-{
-	return (memdesc->priv & KGSL_MEMDESC_GLOBAL) != 0;
-}
-
-/*
- * kgsl_memdesc_has_guard_page - is the last page a guard page?
- * @memdesc - the memdesc
- *
- * Returns nonzero if there is a guard page, 0 otherwise
- */
-static inline int
-kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc)
-{
-	return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0;
-}
-
-/*
- * kgsl_memdesc_protflags - get mmu protection flags
- * @memdesc - the memdesc
- * Returns a mask of GSL_PT_PAGE* or IOMMU* values based
- * on the memdesc flags.
- */
-static inline unsigned int
-kgsl_memdesc_protflags(const struct kgsl_memdesc *memdesc)
-{
-	unsigned int protflags = 0;
-	enum kgsl_mmutype mmutype = kgsl_mmu_get_mmutype();
-
-	if (mmutype == KGSL_MMU_TYPE_GPU) {
-		protflags = GSL_PT_PAGE_RV;
-		if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
-			protflags |= GSL_PT_PAGE_WV;
-	} else if (mmutype == KGSL_MMU_TYPE_IOMMU) {
-		protflags = IOMMU_READ;
-		if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
-			protflags |= IOMMU_WRITE;
-	}
-	return protflags;
-}
-
-/*
- * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
- * @memdesc - the memdesc
- */
-static inline int
-kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
-{
-	return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0;
-}
-
-/*
- * kgsl_memdesc_mmapsize - get the size of the mmap region
- * @memdesc - the memdesc
- *
- * The entire memdesc must be mapped. Additionally if the
- * CPU mapping is going to be mirrored, there must be room
- * for the guard page to be mapped so that the address spaces
- * match up.
- */
-static inline unsigned int
-kgsl_memdesc_mmapsize(const struct kgsl_memdesc *memdesc)
-{
-	unsigned int size = memdesc->size;
-	if (kgsl_memdesc_use_cpu_map(memdesc) &&
-		kgsl_memdesc_has_guard_page(memdesc))
-		size += SZ_4K;
-	return size;
-}
-
 static inline int
 kgsl_allocate(struct kgsl_memdesc *memdesc,
 		struct kgsl_pagetable *pagetable, size_t size)
 {
-	int ret;
-	memdesc->priv |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
 	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
 		return kgsl_sharedmem_ebimem(memdesc, pagetable, size);
-
-	ret = kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
-	if (ret)
-		return ret;
-	ret = kgsl_mmu_map(pagetable, memdesc);
-	if (ret)
-		kgsl_sharedmem_free(memdesc);
-	return ret;
+	memdesc->flags |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
+	return kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
 }
 
 static inline int
@@ -271,9 +179,6 @@
 {
 	int ret;
 
-	if (size == 0)
-		return -EINVAL;
-
 	memdesc->flags = flags;
 
 	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index 4c9c744..a5aa42f 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -106,12 +106,7 @@
 {
 	struct kgsl_snapshot_linux_context *header = _ctxtptr;
 	struct kgsl_context *context = ptr;
-	struct kgsl_device *device;
-
-	if (context)
-		device = context->dev_priv->device;
-	else
-		device = (struct kgsl_device *)data;
+	struct kgsl_device *device = context->dev_priv->device;
 
 	header->id = id;
 
@@ -146,9 +141,6 @@
 
 	idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
 
-	/* Increment ctxcount for the global memstore */
-	ctxtcount++;
-
 	size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
 
 	/* Make sure there is enough room for the data */
@@ -177,9 +169,8 @@
 	header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
 	header->busclk = kgsl_get_clkrate(pwr->ebi1_clk);
 
-	/* Save the last active context */
-	kgsl_sharedmem_readl(&device->memstore, &header->current_context,
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+	/* Future proof for per-context timestamps */
+	header->current_context = -1;
 
 	/* Get the current PT base */
 	header->ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
@@ -194,10 +185,8 @@
 
 	header->ctxtcount = ctxtcount;
 
-	_ctxtptr = snapshot + sizeof(*header);
-	/* append information for the global context */
-	snapshot_context_info(KGSL_MEMSTORE_GLOBAL, NULL, device);
 	/* append information for each context */
+	_ctxtptr = snapshot + sizeof(*header);
 	idr_for_each(&device->context_idr, snapshot_context_info, NULL);
 
 	/* Return the size of the data segment */
@@ -294,7 +283,7 @@
 {
 	list_del(&obj->node);
 
-	obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN;
+	obj->entry->flags &= ~KGSL_MEM_ENTRY_FROZEN;
 	kgsl_mem_entry_put(obj->entry);
 
 	kfree(obj);
@@ -386,8 +375,8 @@
 	/* If the buffer is already on the list, skip it */
 	list_for_each_entry(obj, &device->snapshot_obj_list, node) {
 		if (obj->gpuaddr == gpuaddr && obj->ptbase == ptbase) {
-			/* If the size is different, use the bigger size */
-			if (obj->size < size)
+			/* If the size is different, use the new size */
+			if (obj->size != size)
 				obj->size = size;
 
 			return 0;
@@ -427,10 +416,10 @@
 	 * 0 so it doesn't get counted twice
 	 */
 
-	if (entry->memdesc.priv & KGSL_MEMDESC_FROZEN)
+	if (entry->flags & KGSL_MEM_ENTRY_FROZEN)
 		return 0;
 
-	entry->memdesc.priv |= KGSL_MEMDESC_FROZEN;
+	entry->flags |= KGSL_MEM_ENTRY_FROZEN;
 
 	return entry->memdesc.size;
 }
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
index 4db2815..327d18a 100644
--- a/drivers/gpu/msm/kgsl_snapshot.h
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -52,7 +52,6 @@
 #define KGSL_SNAPSHOT_SECTION_DEBUG        0x0901
 #define KGSL_SNAPSHOT_SECTION_DEBUGBUS     0x0A01
 #define KGSL_SNAPSHOT_SECTION_GPU_OBJECT   0x0B01
-#define KGSL_SNAPSHOT_SECTION_MEMLIST      0x0E01
 
 #define KGSL_SNAPSHOT_SECTION_END          0xFFFF
 
@@ -104,17 +103,6 @@
 	int count;  /* Number of dwords in the dump */
 } __packed;
 
-/* Replay or Memory list section, both sections have same header */
-struct kgsl_snapshot_replay_mem_list {
-	/*
-	 * Number of IBs to replay for replay section or
-	 * number of memory list entries for mem list section
-	 */
-	int num_entries;
-	/* Pagetable base to which the replay IBs or memory entries belong */
-	__u32 ptbase;
-} __packed;
-
 /* Indirect buffer sub-section header */
 struct kgsl_snapshot_ib {
 	__u32 gpuaddr; /* GPU address of the the IB */
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
index 0e3e046..813305a 100644
--- a/drivers/gpu/msm/kgsl_sync.c
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -69,6 +69,7 @@
 
 struct kgsl_fence_event_priv {
 	struct kgsl_context *context;
+	unsigned int timestamp;
 };
 
 /**
@@ -85,7 +86,7 @@
 	void *priv, u32 context_id, u32 timestamp)
 {
 	struct kgsl_fence_event_priv *ev = priv;
-	kgsl_sync_timeline_signal(ev->context->timeline, timestamp);
+	kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
 	kgsl_context_put(ev->context);
 	kfree(ev);
 }
@@ -125,6 +126,7 @@
 	if (event == NULL)
 		return -ENOMEM;
 	event->context = context;
+	event->timestamp = timestamp;
 	kgsl_context_get(context);
 
 	pt = kgsl_sync_pt_create(context->timeline, timestamp);
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index 5f7ee3c..c54445c 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -24,8 +24,6 @@
 #include <linux/tracepoint.h>
 #include "kgsl_device.h"
 
-#include "adreno_drawctxt.h"
-
 struct kgsl_device;
 struct kgsl_ringbuffer_issueibcmds;
 struct kgsl_device_waittimestamp;
@@ -36,16 +34,11 @@
 TRACE_EVENT(kgsl_issueibcmds,
 
 	TP_PROTO(struct kgsl_device *device,
-			int drawctxt_id,
+			struct kgsl_ringbuffer_issueibcmds *cmd,
 			struct kgsl_ibdesc *ibdesc,
-			int numibs,
-			int timestamp,
-			int flags,
-			int result,
-			unsigned int type),
+			int result),
 
-	TP_ARGS(device, drawctxt_id, ibdesc, numibs, timestamp, flags,
-		result, type),
+	TP_ARGS(device, cmd, ibdesc, result),
 
 	TP_STRUCT__entry(
 		__string(device_name, device->name)
@@ -55,23 +48,21 @@
 		__field(unsigned int, timestamp)
 		__field(unsigned int, flags)
 		__field(int, result)
-		__field(unsigned int, drawctxt_type)
 	),
 
 	TP_fast_assign(
 		__assign_str(device_name, device->name);
-		__entry->drawctxt_id = drawctxt_id;
+		__entry->drawctxt_id = cmd->drawctxt_id;
 		__entry->ibdesc_addr = ibdesc[0].gpuaddr;
-		__entry->numibs = numibs;
-		__entry->timestamp = timestamp;
-		__entry->flags = flags;
+		__entry->numibs = cmd->numibs;
+		__entry->timestamp = cmd->timestamp;
+		__entry->flags = cmd->flags;
 		__entry->result = result;
-		__entry->drawctxt_type = type;
 	),
 
 	TP_printk(
 		"d_name=%s ctx=%u ib=0x%u numibs=%u timestamp=0x%x "
-		"flags=0x%x(%s) result=%d type=%s",
+		"flags=0x%x(%s) result=%d",
 		__get_str(device_name),
 		__entry->drawctxt_id,
 		__entry->ibdesc_addr,
@@ -83,9 +74,7 @@
 			{ KGSL_CONTEXT_SUBMIT_IB_LIST, "IB_LIST" },
 			{ KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" })
 			: "None",
-		__entry->result,
-		__print_symbolic(__entry->drawctxt_type,
-			ADRENO_DRAWCTXT_TYPES)
+		__entry->result
 	)
 );
 
@@ -285,32 +274,6 @@
 	)
 );
 
-TRACE_EVENT(kgsl_gpubusy,
-	TP_PROTO(struct kgsl_device *device, unsigned int busy,
-		unsigned int elapsed),
-
-	TP_ARGS(device, busy, elapsed),
-
-	TP_STRUCT__entry(
-		__string(device_name, device->name)
-		__field(unsigned int, busy)
-		__field(unsigned int, elapsed)
-	),
-
-	TP_fast_assign(
-		__assign_str(device_name, device->name);
-		__entry->busy = busy;
-		__entry->elapsed = elapsed;
-	),
-
-	TP_printk(
-		"d_name=%s busy=%d elapsed=%d",
-		__get_str(device_name),
-		__entry->busy,
-		__entry->elapsed
-	)
-);
-
 DECLARE_EVENT_CLASS(kgsl_pwrstate_template,
 	TP_PROTO(struct kgsl_device *device, unsigned int state),
 
@@ -354,8 +317,6 @@
 		__field(unsigned int, size)
 		__field(unsigned int, tgid)
 		__array(char, usage, 16)
-		__field(unsigned int, id)
-		__field(unsigned int, flags)
 	),
 
 	TP_fast_assign(
@@ -364,76 +325,12 @@
 		__entry->tgid = mem_entry->priv->pid;
 		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
 				     mem_entry->memdesc.flags);
-		__entry->id = mem_entry->id;
-		__entry->flags = mem_entry->memdesc.flags;
 	),
 
 	TP_printk(
-		"gpuaddr=0x%08x size=%d tgid=%d usage=%s id=%d flags=0x%08x",
+		"gpuaddr=0x%08x size=%d tgid=%d usage=%s",
 		__entry->gpuaddr, __entry->size, __entry->tgid,
-		__entry->usage, __entry->id, __entry->flags
-	)
-);
-
-TRACE_EVENT(kgsl_mem_mmap,
-
-	TP_PROTO(struct kgsl_mem_entry *mem_entry),
-
-	TP_ARGS(mem_entry),
-
-	TP_STRUCT__entry(
-		__field(unsigned long, useraddr)
-		__field(unsigned int, gpuaddr)
-		__field(unsigned int, size)
-		__array(char, usage, 16)
-		__field(unsigned int, id)
-		__field(unsigned int, flags)
-	),
-
-	TP_fast_assign(
-		__entry->useraddr = mem_entry->memdesc.useraddr;
-		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
-		__entry->size = mem_entry->memdesc.size;
-		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
-				     mem_entry->memdesc.flags);
-		__entry->id = mem_entry->id;
-		__entry->flags = mem_entry->memdesc.flags;
-	),
-
-	TP_printk(
-		"useraddr=%lx gpuaddr=0x%08x size=%d usage=%s id=%d"
-		" flags=0x%08x",
-		__entry->useraddr, __entry->gpuaddr, __entry->size,
-		__entry->usage, __entry->id, __entry->flags
-	)
-);
-
-TRACE_EVENT(kgsl_mem_unmapped_area_collision,
-
-	TP_PROTO(struct kgsl_mem_entry *mem_entry,
-		 unsigned long hint,
-		 unsigned long len,
-		 unsigned long addr),
-
-	TP_ARGS(mem_entry, hint, len, addr),
-
-	TP_STRUCT__entry(
-		__field(unsigned int, id)
-		__field(unsigned long, hint)
-		__field(unsigned long, len)
-		__field(unsigned long, addr)
-	),
-
-	TP_fast_assign(
-		__entry->id = mem_entry->id;
-		__entry->hint  = hint;
-		__entry->len = len;
-		__entry->addr = addr;
-	),
-
-	TP_printk(
-		"id=%d hint=0x%lx len=%ld addr=0x%lx",
-		__entry->id, __entry->hint, __entry->len, __entry->addr
+		__entry->usage
 	)
 );
 
@@ -450,7 +347,6 @@
 		__field(int, type)
 		__field(unsigned int, tgid)
 		__array(char, usage, 16)
-		__field(unsigned int, id)
 	),
 
 	TP_fast_assign(
@@ -461,14 +357,13 @@
 		__entry->tgid = mem_entry->priv->pid;
 		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
 				     mem_entry->memdesc.flags);
-		__entry->id = mem_entry->id;
 	),
 
 	TP_printk(
-		"gpuaddr=0x%08x size=%d type=%d fd=%d tgid=%d usage=%s id=%d",
+		"gpuaddr=0x%08x size=%d type=%d fd=%d tgid=%d usage %s",
 		__entry->gpuaddr, __entry->size,
 		__entry->type, __entry->fd, __entry->tgid,
-		__entry->usage, __entry->id
+		__entry->usage
 	)
 );
 
@@ -485,7 +380,6 @@
 		__field(int, fd)
 		__field(unsigned int, tgid)
 		__array(char, usage, 16)
-		__field(unsigned int, id)
 	),
 
 	TP_fast_assign(
@@ -495,47 +389,12 @@
 		__entry->tgid = mem_entry->priv->pid;
 		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
 				     mem_entry->memdesc.flags);
-		__entry->id = mem_entry->id;
 	),
 
 	TP_printk(
-		"gpuaddr=0x%08x size=%d type=%d tgid=%d usage=%s id=%d",
+		"gpuaddr=0x%08x size=%d type=%d tgid=%d usage=%s",
 		__entry->gpuaddr, __entry->size, __entry->type,
-		__entry->tgid, __entry->usage, __entry->id
-	)
-);
-
-TRACE_EVENT(kgsl_mem_sync_cache,
-
-	TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int op),
-
-	TP_ARGS(mem_entry, op),
-
-	TP_STRUCT__entry(
-		__field(unsigned int, gpuaddr)
-		__field(unsigned int, size)
-		__array(char, usage, 16)
-		__field(unsigned int, tgid)
-		__field(unsigned int, id)
-		__field(unsigned int, op)
-	),
-
-	TP_fast_assign(
-		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
-		__entry->size = mem_entry->memdesc.size;
-		__entry->tgid = mem_entry->priv->pid;
-		__entry->id = mem_entry->id;
-		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
-				     mem_entry->memdesc.flags);
-		__entry->op = op;
-	),
-
-	TP_printk(
-		"gpuaddr=0x%08x size=%d tgid=%d usage=%s id=%d op=%c%c",
-		__entry->gpuaddr, __entry->size, __entry->tgid, __entry->usage,
-		__entry->id,
-		(__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.',
-		(__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.'
+		__entry->tgid, __entry->usage
 	)
 );
 
@@ -552,7 +411,6 @@
 		__field(unsigned int, size)
 		__field(int, type)
 		__array(char, usage, 16)
-		__field(unsigned int, id)
 		__field(unsigned int, drawctxt_id)
 		__field(unsigned int, curr_ts)
 		__field(unsigned int, free_ts)
@@ -564,7 +422,6 @@
 		__entry->size = mem_entry->memdesc.size;
 		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
 				     mem_entry->memdesc.flags);
-		__entry->id = mem_entry->id;
 		__entry->drawctxt_id = id;
 		__entry->type = mem_entry->memtype;
 		__entry->curr_ts = curr_ts;
@@ -572,14 +429,13 @@
 	),
 
 	TP_printk(
-		"d_name=%s gpuaddr=0x%08x size=%d type=%d usage=%s id=%d ctx=%u"
+		"d_name=%s gpuaddr=0x%08x size=%d type=%d usage=%s ctx=%u"
 		" curr_ts=0x%x free_ts=0x%x",
 		__get_str(device_name),
 		__entry->gpuaddr,
 		__entry->size,
 		__entry->type,
 		__entry->usage,
-		__entry->id,
 		__entry->drawctxt_id,
 		__entry->curr_ts,
 		__entry->free_ts
@@ -679,31 +535,6 @@
 	)
 );
 
-TRACE_EVENT(kgsl_regwrite,
-
-	TP_PROTO(struct kgsl_device *device, unsigned int offset,
-		unsigned int value),
-
-	TP_ARGS(device, offset, value),
-
-	TP_STRUCT__entry(
-		__string(device_name, device->name)
-		__field(unsigned int, offset)
-		__field(unsigned int, value)
-	),
-
-	TP_fast_assign(
-		__assign_str(device_name, device->name);
-		__entry->offset = offset;
-		__entry->value = value;
-	),
-
-	TP_printk(
-		"d_name=%s reg=%x value=%x",
-		__get_str(device_name), __entry->offset, __entry->value
-	)
-);
-
 TRACE_EVENT(kgsl_register_event,
 		TP_PROTO(unsigned int id, unsigned int timestamp),
 		TP_ARGS(id, timestamp),
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
index a07959b..484630d 100644
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -17,7 +17,6 @@
 #include "kgsl.h"
 #include "kgsl_cffdump.h"
 #include "kgsl_sharedmem.h"
-#include "kgsl_trace.h"
 
 #include "z180.h"
 #include "z180_reg.h"
@@ -94,8 +93,7 @@
 #define Z180_CMDWINDOW_TARGET_SHIFT		0
 #define Z180_CMDWINDOW_ADDR_SHIFT		8
 
-static int z180_init(struct kgsl_device *device);
-static int z180_start(struct kgsl_device *device);
+static int z180_start(struct kgsl_device *device, unsigned int init_ram);
 static int z180_stop(struct kgsl_device *device);
 static int z180_wait(struct kgsl_device *device,
 				struct kgsl_context *context,
@@ -214,6 +212,10 @@
 
 			queue_work(device->work_queue, &device->ts_expired_ws);
 			wake_up_interruptible(&device->wait_queue);
+
+			atomic_notifier_call_chain(
+				&(device->ts_notifier_list),
+				device->id, NULL);
 		}
 	}
 
@@ -246,26 +248,22 @@
 	int result = 0;
 	struct z180_device *z180_dev = Z180_DEVICE(device);
 
-	result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory);
+	result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 
 	if (result)
 		goto error;
 
-	result = kgsl_mmu_map_global(pagetable, &device->memstore);
+	result = kgsl_mmu_map_global(pagetable, &device->memstore,
+				     GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
 	if (result)
 		goto error_unmap_dummy;
 
 	result = kgsl_mmu_map_global(pagetable,
-				     &z180_dev->ringbuffer.cmdbufdesc);
+				     &z180_dev->ringbuffer.cmdbufdesc,
+				     GSL_PT_PAGE_RV);
 	if (result)
 		goto error_unmap_memstore;
-	/*
-	 * Set the mpu end to the last "normal" global memory we use.
-	 * For the IOMMU, this will be used to restrict access to the
-	 * mapped registers.
-	 */
-	device->mh.mpu_range = z180_dev->ringbuffer.cmdbufdesc.gpuaddr +
-				z180_dev->ringbuffer.cmdbufdesc.size;
 	return result;
 
 error_unmap_dummy:
@@ -321,11 +319,16 @@
 	*p++ = ADDR_VGV3_LAST << 24;
 }
 
-static void z180_cmdstream_start(struct kgsl_device *device)
+static void z180_cmdstream_start(struct kgsl_device *device, int init_ram)
 {
 	struct z180_device *z180_dev = Z180_DEVICE(device);
 	unsigned int cmd = VGV3_NEXTCMD_JUMP << VGV3_NEXTCMD_NEXTCMD_FSHIFT;
 
+	if (init_ram) {
+		z180_dev->timestamp = 0;
+		z180_dev->current_timestamp = 0;
+	}
+
 	addmarker(&z180_dev->ringbuffer, 0);
 
 	z180_cmdwindow_write(device, ADDR_VGV3_MODE, 4);
@@ -484,10 +487,6 @@
 	z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd);
 	z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0);
 error:
-
-	trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs,
-		*timestamp, ctrl, result, 0);
-
 	return (int)result;
 }
 
@@ -496,7 +495,6 @@
 	struct z180_device *z180_dev = Z180_DEVICE(device);
 	memset(&z180_dev->ringbuffer, 0, sizeof(struct z180_ringbuffer));
 	z180_dev->ringbuffer.prevctx = Z180_INVALID_CONTEXT;
-	z180_dev->ringbuffer.cmdbufdesc.flags = KGSL_MEMFLAGS_GPUREADONLY;
 	return kgsl_allocate_contiguous(&z180_dev->ringbuffer.cmdbufdesc,
 		Z180_RB_SIZE);
 }
@@ -553,17 +551,7 @@
 	return 0;
 }
 
-static int z180_init(struct kgsl_device *device)
-{
-	struct z180_device *z180_dev = Z180_DEVICE(device);
-
-	z180_dev->timestamp = 0;
-	z180_dev->current_timestamp = 0;
-
-	return 0;
-}
-
-static int z180_start(struct kgsl_device *device)
+static int z180_start(struct kgsl_device *device, unsigned int init_ram)
 {
 	int status = 0;
 
@@ -580,14 +568,11 @@
 	if (status)
 		goto error_clk_off;
 
-	z180_cmdstream_start(device);
+	z180_cmdstream_start(device, init_ram);
 
 	mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
 	device->ftbl->irqctrl(device, 1);
-
-	device->reset_counter++;
-
 	return 0;
 
 error_clk_off:
@@ -826,9 +811,9 @@
 {
 	int status = -EINVAL;
 
-	/* Don't wait forever, set a max of Z180_IDLE_TIMEOUT */
+	/* Don't wait forever, set a max (10 sec) value for now */
 	if (msecs == -1)
-		msecs = Z180_IDLE_TIMEOUT;
+		msecs = 10 * MSEC_PER_SEC;
 
 	mutex_unlock(&device->mutex);
 	status = z180_wait(device, context, timestamp, msecs);
@@ -930,7 +915,6 @@
 	.idle = z180_idle,
 	.isidle = z180_isidle,
 	.suspend_context = z180_suspend_context,
-	.init = z180_init,
 	.start = z180_start,
 	.stop = z180_stop,
 	.getproperty = z180_getproperty,
diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h
index 1be0870..268aac3 100644
--- a/drivers/gpu/msm/z180.h
+++ b/drivers/gpu/msm/z180.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -29,7 +29,7 @@
 #define Z180_DEFAULT_PWRSCALE_POLICY  NULL
 
 /* Wait a maximum of 10 seconds when trying to idle the core */
-#define Z180_IDLE_TIMEOUT (20 * 1000)
+#define Z180_IDLE_TIMEOUT (10 * 1000)
 
 struct z180_ringbuffer {
 	unsigned int prevctx;