msm: kgsl: Add GPU snapshot support

Add support for snapshotting the state of the GPU in a binary format. The
snapshot can be pulled from sysfs and parsed off-target.  The advantage is
that far more information can be encapsulated in a snapshot then can be
printed on the console during a hang.  Snapshots are automatically created
during a hang, or can be manually triggered by writing 'trigger'
in /sys/class/kgsl/kgsl-<n>/snapshot where n is either the 2D or 3D device.
Snapshot data can be pulled from 'dump' in the same directory.

Change-Id: Ic0dedbad1c8473b3b109582b95a8771b0d341f05
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index 908b63d..7b8f3e6 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -8,7 +8,8 @@
 	kgsl_pwrscale.o \
 	kgsl_mmu.o \
 	kgsl_gpummu.o \
-	kgsl_iommu.o
+	kgsl_iommu.o \
+	kgsl_snapshot.o
 
 msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
 msm_kgsl_core-$(CONFIG_MSM_KGSL_CFF_DUMP) += kgsl_cffdump.o
@@ -20,8 +21,10 @@
 	adreno_ringbuffer.o \
 	adreno_drawctxt.o \
 	adreno_postmortem.o \
+	adreno_snapshot.o \
 	adreno_a2xx.o \
 	adreno_a2xx_trace.o \
+	adreno_a2xx_snapshot.o \
 	adreno.o
 
 msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o
diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h
index 5ffdea1..50b2745 100644
--- a/drivers/gpu/msm/a2xx_reg.h
+++ b/drivers/gpu/msm/a2xx_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -416,4 +416,37 @@
 #define REG_A225_GRAS_UCP5W              0x2357
 #define REG_A225_GRAS_UCP_ENABLED        0x2360
 
+/* Debug registers used by snapshot */
+#define REG_PA_SU_DEBUG_CNTL            0x0C80
+#define REG_PA_SU_DEBUG_DATA            0x0C81
+#define REG_RB_DEBUG_CNTL               0x0F26
+#define REG_RB_DEBUG_DATA               0x0F27
+#define REG_PC_DEBUG_CNTL               0x0C38
+#define REG_PC_DEBUG_DATA               0x0C39
+#define REG_GRAS_DEBUG_CNTL             0x0C80
+#define REG_GRAS_DEBUG_DATA             0x0C81
+#define REG_SQ_DEBUG_MISC               0x0D05
+#define REG_SQ_DEBUG_INPUT_FSM          0x0DAE
+#define REG_SQ_DEBUG_CONST_MGR_FSM      0x0DAF
+#define REG_SQ_DEBUG_EXP_ALLOC          0x0DB3
+#define REG_SQ_DEBUG_FSM_ALU_0          0x0DB1
+#define REG_SQ_DEBUG_FSM_ALU_1          0x0DB2
+#define REG_SQ_DEBUG_PTR_BUFF           0x0DB4
+#define REG_SQ_DEBUG_GPR_VTX            0x0DB5
+#define REG_SQ_DEBUG_GPR_PIX            0x0DB6
+#define REG_SQ_DEBUG_TB_STATUS_SEL      0x0DB7
+#define REG_SQ_DEBUG_VTX_TB_0           0x0DB8
+#define REG_SQ_DEBUG_VTX_TB_1           0x0DB9
+#define REG_SQ_DEBUG_VTX_TB_STATE_MEM   0x0DBB
+#define REG_SQ_DEBUG_TP_FSM             0x0DB0
+#define REG_SQ_DEBUG_VTX_TB_STATUS_REG  0x0DBA
+#define REG_SQ_DEBUG_PIX_TB_0           0x0DBC
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_0 0x0DBD
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_1 0x0DBE
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_2 0x0DBF
+#define REG_SQ_DEBUG_PIX_TB_STATUS_REG_3 0x0DC0
+#define REG_SQ_DEBUG_PIX_TB_STATE_MEM   0x0DC1
+#define REG_SQ_DEBUG_MISC_0             0x2309
+#define REG_SQ_DEBUG_MISC_1             0x230A
+
 #endif /* __A200_REG_H */
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 03e447b..c9f73b5 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -728,8 +728,21 @@
 	} else {
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_RECOVER);
 		INIT_COMPLETION(device->recovery_gate);
-		/* Detected a hang - trigger an automatic dump */
+		/* Detected a hang */
+
+
+		/*
+		 * Trigger an automatic dump of the state to
+		 * the console
+		 */
 		adreno_postmortem_dump(device, 0);
+
+		/*
+		 * Make a GPU snapshot.  For now, do it after the PM dump so we
+		 * can at least be sure the PM dump will work as it always has
+		 */
+		kgsl_device_snapshot(device, 1);
+
 		result = adreno_recover_hang(device);
 		if (result)
 			kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
@@ -1341,6 +1354,7 @@
 	.power_stats = adreno_power_stats,
 	.irqctrl = adreno_irqctrl,
 	.gpuid = adreno_gpuid,
+	.snapshot = adreno_snapshot,
 	/* Optional functions */
 	.setstate = adreno_setstate,
 	.drawctxt_create = adreno_drawctxt_create,
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index a3d2c00..4376ebe 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -84,6 +84,7 @@
 	void (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
 	irqreturn_t (*irq_handler)(struct adreno_device *);
 	void (*irq_control)(struct adreno_device *, int);
+	void * (*snapshot)(struct adreno_device *, void *, int *, int);
 };
 
 extern struct adreno_gpudev adreno_a2xx_gpudev;
@@ -108,6 +109,9 @@
 uint8_t *adreno_convertaddr(struct kgsl_device *device,
 	unsigned int pt_base, unsigned int gpuaddr, unsigned int size);
 
+void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
+		int hang);
+
 static inline int adreno_is_a200(struct adreno_device *adreno_dev)
 {
 	return (adreno_dev->gpurev == ADRENO_REV_A200);
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index 16402c3..6234104 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1704,6 +1704,10 @@
 	wmb();
 }
 
+/* Defined in adreno_a2xx_snapshot.c */
+void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
+	int *remain, int hang);
+
 struct adreno_gpudev adreno_a2xx_gpudev = {
 	.ctxt_gpustate_shadow = a2xx_ctxt_gpustate_shadow,
 	.ctxt_gmem_shadow = a2xx_ctxt_gmem_shadow,
@@ -1711,4 +1715,5 @@
 	.ctxt_restore = a2xx_ctxt_restore,
 	.irq_handler = a2xx_irq_handler,
 	.irq_control = a2xx_irq_control,
+	.snapshot = a2xx_snapshot,
 };
diff --git a/drivers/gpu/msm/adreno_a2xx_snapshot.c b/drivers/gpu/msm/adreno_a2xx_snapshot.c
new file mode 100644
index 0000000..30d692b
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a2xx_snapshot.c
@@ -0,0 +1,356 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+
+#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+		+ sizeof(struct kgsl_snapshot_debug))
+
+/* Dump the SX debug registers into a GPU snapshot debug section */
+
+#define SXDEBUG_COUNT 0x1B
+
+static int a2xx_snapshot_sxdebug(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i;
+
+	if (remain < DEBUG_SECTION_SZ(SXDEBUG_COUNT)) {
+		SNAPSHOT_ERR_NOMEM(device, "SX DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SX;
+	header->size = SXDEBUG_COUNT;
+
+	for (i = 0; i < SXDEBUG_COUNT; i++) {
+		adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0x1B00 | i);
+		adreno_regread(device, REG_RBBM_DEBUG_OUT, &data[i]);
+	}
+
+	adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0);
+
+	return DEBUG_SECTION_SZ(SXDEBUG_COUNT);
+}
+
+#define CPDEBUG_COUNT 0x20
+
+static int a2xx_snapshot_cpdebug(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i;
+
+	if (remain < DEBUG_SECTION_SZ(CPDEBUG_COUNT)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP;
+	header->size = CPDEBUG_COUNT;
+
+	for (i = 0; i < CPDEBUG_COUNT; i++) {
+		adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0x1628);
+		adreno_regread(device, REG_RBBM_DEBUG_OUT, &data[i]);
+	}
+
+	adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0);
+
+	return DEBUG_SECTION_SZ(CPDEBUG_COUNT);
+}
+
+/*
+ * The contents of the SQ debug sections are dword pairs:
+ * [register offset]:[value]
+ * This macro writes both dwords for the given register
+ */
+
+#define SQ_DEBUG_WRITE(_device, _reg, _data, _offset) \
+	do { _data[(_offset)++] = (_reg); \
+	adreno_regread(_device, (_reg), &_data[(_offset)++]); } while (0)
+
+#define SQ_DEBUG_BANK_SIZE 23
+
+static int a2xx_snapshot_sqdebug(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i, offset = 0;
+	int size = SQ_DEBUG_BANK_SIZE * 2 * 2;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "SQ Debug");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SQ;
+	header->size = size;
+
+	for (i = 0; i < 2; i++) {
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_CONST_MGR_FSM+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_EXP_ALLOC+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_FSM_ALU_0+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_FSM_ALU_1+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_GPR_PIX+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_GPR_VTX+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_INPUT_FSM+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_MISC+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_MISC_0+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_MISC_1+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_0+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATE_MEM+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device,
+			REG_SQ_DEBUG_PIX_TB_STATUS_REG_0+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device,
+			REG_SQ_DEBUG_PIX_TB_STATUS_REG_1+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device,
+			REG_SQ_DEBUG_PIX_TB_STATUS_REG_2+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device,
+			REG_SQ_DEBUG_PIX_TB_STATUS_REG_3+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PTR_BUFF+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_TB_STATUS_SEL+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_TP_FSM+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_VTX_TB_0+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_VTX_TB_1+i*0x1000,
+			data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_VTX_TB_STATE_MEM+i*0x1000,
+			data, offset);
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+#define SQ_DEBUG_THREAD_SIZE 7
+
+static int a2xx_snapshot_sqthreaddebug(struct kgsl_device *device,
+	void *snapshot, int remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i, offset = 0;
+	int size = SQ_DEBUG_THREAD_SIZE * 2 * 16;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "SQ THREAD DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SQTHREAD;
+	header->size = size;
+
+	for (i = 0; i < 16; i++) {
+		adreno_regwrite(device, REG_SQ_DEBUG_TB_STATUS_SEL,
+				i | (6<<4) | (i<<7) | (1<<11) | (1<<12)
+				| (i<<16) | (6<<20) | (i<<23));
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_VTX_TB_STATE_MEM,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_VTX_TB_STATUS_REG,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATE_MEM,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATUS_REG_0,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATUS_REG_1,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATUS_REG_2,
+			 data, offset);
+		SQ_DEBUG_WRITE(device, REG_SQ_DEBUG_PIX_TB_STATUS_REG_3,
+			 data, offset);
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+#define MIUDEBUG_COUNT 0x10
+
+static int a2xx_snapshot_miudebug(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i;
+
+	if (remain < DEBUG_SECTION_SZ(MIUDEBUG_COUNT)) {
+		SNAPSHOT_ERR_NOMEM(device, "MIU DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_MIU;
+	header->size = MIUDEBUG_COUNT;
+
+	for (i = 0; i < MIUDEBUG_COUNT; i++) {
+		adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0x1600 | i);
+		adreno_regread(device, REG_RBBM_DEBUG_OUT, &data[i]);
+	}
+
+	adreno_regwrite(device, REG_RBBM_DEBUG_CNTL, 0);
+
+	return DEBUG_SECTION_SZ(MIUDEBUG_COUNT);
+}
+
+/* Helper function to snapshot a section of indexed registers */
+
+static void *a2xx_snapshot_indexed_registers(struct kgsl_device *device,
+		void *snapshot, int *remain,
+		unsigned int index, unsigned int data, unsigned int start,
+		unsigned int count)
+{
+	struct kgsl_snapshot_indexed_registers iregs;
+	iregs.index = index;
+	iregs.data = data;
+	iregs.start = start;
+	iregs.count = count;
+
+	return kgsl_snapshot_add_section(device,
+		 KGSL_SNAPSHOT_SECTION_INDEXED_REGS, snapshot,
+		 remain, kgsl_snapshot_dump_indexed_regs, &iregs);
+}
+
+/* A2XX GPU snapshot function - this is where all of the A2XX specific
+ * bits and pieces are grabbed into the snapshot memory
+ */
+
+void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
+	int *remain, int hang)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_snapshot_registers regs;
+	unsigned int pmoverride;
+
+	/* Choose the register set to dump */
+
+	if (adreno_is_a20x(adreno_dev)) {
+		regs.regs = (unsigned int *) a200_registers;
+		regs.count = a200_registers_count;
+	} else {
+		regs.regs = (unsigned int *) a220_registers;
+		regs.count = a220_registers_count;
+	}
+
+	/* Master set of (non debug) registers */
+	snapshot = kgsl_snapshot_add_section(device,
+		KGSL_SNAPSHOT_SECTION_REGS, snapshot, remain,
+		kgsl_snapshot_dump_regs, &regs);
+
+	/* CP_STATE_DEBUG indexed registers */
+	snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_CP_STATE_DEBUG_INDEX,
+			REG_CP_STATE_DEBUG_DATA, 0x0, 0x14);
+
+	/* CP_ME indexed registers */
+	snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_CP_ME_CNTL, REG_CP_ME_STATUS,
+			64, 44);
+
+	/*
+	 * Need to temporarily turn off clock gating for the debug bus to
+	 * work
+	 */
+
+	adreno_regread(device, REG_RBBM_PM_OVERRIDE2, &pmoverride);
+	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xFF);
+
+	/* SX debug registers */
+	snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+			a2xx_snapshot_sxdebug, NULL);
+
+	/* SU debug indexed registers (only for < 470) */
+	if (!adreno_is_a22x(adreno_dev))
+		snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+				remain, REG_PA_SU_DEBUG_CNTL,
+				REG_PA_SU_DEBUG_DATA,
+				0, 0x1B);
+
+	/* CP debug registers */
+	snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+			a2xx_snapshot_cpdebug, NULL);
+
+	/* MH debug indexed registers */
+	snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, MH_DEBUG_CTRL, MH_DEBUG_DATA, 0x0, 0x40);
+
+	/* Leia only register sets */
+	if (adreno_is_a22x(adreno_dev)) {
+		/* RB DEBUG indexed regisers */
+		snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_RB_DEBUG_CNTL, REG_RB_DEBUG_DATA, 0, 8);
+
+		/* RB DEBUG indexed registers bank 2 */
+		snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_RB_DEBUG_CNTL, REG_RB_DEBUG_DATA + 0x1000,
+			0, 8);
+
+		/* PC_DEBUG indexed registers */
+		snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_PC_DEBUG_CNTL, REG_PC_DEBUG_DATA, 0, 8);
+
+		/* GRAS_DEBUG indexed registers */
+		snapshot = a2xx_snapshot_indexed_registers(device, snapshot,
+			remain, REG_GRAS_DEBUG_CNTL, REG_GRAS_DEBUG_DATA, 0, 4);
+
+		/* MIU debug registers */
+		snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+			a2xx_snapshot_miudebug, NULL);
+
+		/* SQ DEBUG debug registers */
+		snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+			a2xx_snapshot_sqdebug, NULL);
+
+		/*
+		 * Reading SQ THREAD causes bad things to happen on a running
+		 * system, so only read it if the GPU is already hung
+		 */
+
+		if (hang) {
+			/* SQ THREAD debug registers */
+			snapshot = kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+				a2xx_snapshot_sqthreaddebug, NULL);
+		}
+	}
+
+	/* Reset the clock gating */
+	adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, pmoverride);
+
+	return snapshot;
+}
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
new file mode 100644
index 0000000..fb88a72
--- /dev/null
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -0,0 +1,332 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "a2xx_reg.h"
+
+/* Number of dwords of ringbuffer history to record */
+#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
+
+/* Maintain a list of the objects we see during parsing */
+
+#define SNAPSHOT_OBJ_BUFSIZE 64
+
+#define SNAPSHOT_OBJ_TYPE_IB 0
+
+static struct kgsl_snapshot_obj {
+	int type;
+	uint32_t gpuaddr;
+	uint32_t ptbase;
+	void *ptr;
+	int dwords;
+} objbuf[SNAPSHOT_OBJ_BUFSIZE];
+
+/* Pointer to the next open entry in the object list */
+static int objbufptr;
+
+/* Push a new buffer object onto the list */
+static void push_object(struct kgsl_device *device, int type, uint32_t ptbase,
+	uint32_t gpuaddr, int dwords)
+{
+	int index;
+	void *ptr;
+
+	/* Go through the list and see that object has already been seen */
+	for (index = 0; index < objbufptr; index++) {
+		if (objbuf[index].gpuaddr == gpuaddr &&
+			objbuf[index].ptbase == ptbase)
+			return;
+	}
+
+	if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) {
+		KGSL_DRV_ERR(device, "snapshot: too many snapshot objects\n");
+		return;
+	}
+
+	/*
+	 * adreno_convertaddr verifies that the IB size is valid - at least in
+	 * the context of it being smaller then the allocated memory space
+	 */
+	ptr = adreno_convertaddr(device, ptbase, gpuaddr, dwords << 2);
+
+	if (ptr == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Can't find GPU address for %x\n", gpuaddr);
+		return;
+	}
+
+	/* Put it on the list of things to parse */
+	objbuf[objbufptr].type = type;
+	objbuf[objbufptr].gpuaddr = gpuaddr;
+	objbuf[objbufptr].ptbase = ptbase;
+	objbuf[objbufptr].dwords = dwords;
+	objbuf[objbufptr++].ptr = ptr;
+}
+
+/* Snapshot the istore memory */
+static int snapshot_istore(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_istore *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int count, i;
+
+	count = adreno_dev->istore_size * ADRENO_ISTORE_WORDS;
+
+	if (remain < (count * 4) + sizeof(*header)) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Not enough memory for the istore section");
+		return 0;
+	}
+
+	header->count = adreno_dev->istore_size;
+
+	for (i = 0; i < count; i++)
+		kgsl_regread(device, ADRENO_ISTORE_START + i, &data[i]);
+
+	return (count * 4) + sizeof(*header);
+}
+
+/* Snapshot the ringbuffer memory */
+static int snapshot_rb(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_rb *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+	unsigned int rbbase, ptbase, rptr, *rbptr;
+	int start, stop, index;
+	int numitems, size;
+
+	/* Get the GPU address of the ringbuffer */
+	kgsl_regread(device, REG_CP_RB_BASE, &rbbase);
+
+	/* Get the physical address of the MMU pagetable */
+	ptbase = kgsl_mmu_get_current_ptbase(device);
+
+	/* Get the current read pointers for the RB */
+	kgsl_regread(device, REG_CP_RB_RPTR, &rptr);
+
+	/* start the dump at the rptr minus some history */
+	start = (int) rptr - NUM_DWORDS_OF_RINGBUFFER_HISTORY;
+	if (start < 0)
+		start += rb->sizedwords;
+
+	/*
+	 * Stop the dump at the point where the software last wrote.  Don't use
+	 * the hardware value here on the chance that it didn't get properly
+	 * updated
+	 */
+
+	stop = (int) rb->wptr + 16;
+	if (stop > rb->sizedwords)
+		stop -= rb->sizedwords;
+
+	/* Set up the header for the section */
+
+	numitems = (stop > start) ? stop - start :
+		(rb->sizedwords - start) + stop;
+
+	size = (numitems << 2);
+
+	if (remain < size + sizeof(*header)) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Not enough memory for the rb section");
+		return 0;
+	}
+
+	/* Write the sub-header for the section */
+	header->start = start;
+	header->end = stop;
+	header->wptr = rb->wptr;
+	header->rbsize = rb->sizedwords;
+	header->count = numitems;
+
+	index = start;
+	rbptr = rb->buffer_desc.hostptr;
+
+	/*
+	 * Loop through the RB, copying the data and looking for indirect
+	 * buffers and MMU pagetable changes
+	 */
+
+	while (index != rb->wptr) {
+		*data = rbptr[index];
+
+		if (rbptr[index] == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2))
+			push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
+				rbptr[index + 1], rbptr[index + 2]);
+
+		/*
+		 * FIXME: Handle upcoming MMU pagetable changes, but only
+		 * between the rptr and the wptr
+		 */
+
+		index = index + 1;
+
+		if (index == rb->sizedwords)
+			index = 0;
+
+		data++;
+	}
+
+	/* Dump 16 dwords past the wptr, but don't  bother interpeting it */
+
+	while (index != stop) {
+		*data = rbptr[index];
+		index = index + 1;
+
+		if (index == rb->sizedwords)
+			index = 0;
+
+		data++;
+	}
+
+	/* Return the size of the section */
+	return size + sizeof(*header);
+}
+
+/* Snapshot the memory for an indirect buffer */
+static int snapshot_ib(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_ib *header = snapshot;
+	struct kgsl_snapshot_obj *obj = priv;
+	unsigned int *src = obj->ptr;
+	unsigned int *dst = snapshot + sizeof(*header);
+	int i;
+
+	if (remain < (obj->dwords << 2) + sizeof(*header)) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Not enough memory for the ib section");
+		return 0;
+	}
+
+	/* Write the sub-header for the section */
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase = obj->ptbase;
+	header->size = obj->dwords;
+
+	/* Write the contents of the ib */
+	for (i = 0; i < obj->dwords; i++) {
+		*dst = *src;
+		/* If another IB is discovered, then push it on the list too */
+
+		if (*src == cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)) {
+			push_object(device, SNAPSHOT_OBJ_TYPE_IB, obj->ptbase,
+				*(src + 1), *(src + 2));
+		}
+
+		src++;
+		dst++;
+	}
+
+	return (obj->dwords << 2) + sizeof(*header);
+}
+
+/* Dump another item on the current pending list */
+static void *dump_object(struct kgsl_device *device, int obj, void *snapshot,
+	int *remain)
+{
+	switch (objbuf[obj].type) {
+	case SNAPSHOT_OBJ_TYPE_IB:
+		snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_IB, snapshot, remain,
+			snapshot_ib, &objbuf[obj]);
+		break;
+	default:
+		KGSL_DRV_ERR(device,
+			"snapshot: Invalid snapshot object type: %d\n",
+			objbuf[obj].type);
+		break;
+	}
+
+	return snapshot;
+}
+
+/* adreno_snapshot - Snapshot the Adreno GPU state
+ * @device - KGSL device to snapshot
+ * @snapshot - Pointer to the start of memory to write into
+ * @remain - A pointer to how many bytes of memory are remaining in the snapshot
+ * @hang - set if this snapshot was automatically triggered by a GPU hang
+ * This is a hook function called by kgsl_snapshot to snapshot the
+ * Adreno specific information for the GPU snapshot.  In turn, this function
+ * calls the GPU specific snapshot function to get core specific information.
+ */
+
+void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
+		int hang)
+{
+	int i;
+	uint32_t ptbase, ibbase, ibsize;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Reset the list of objects */
+	objbufptr = 0;
+
+	/* Get the physical address of the MMU pagetable */
+	ptbase = kgsl_mmu_get_current_ptbase(device);
+
+	/* Dump the ringbuffer */
+	snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB,
+		snapshot, remain, snapshot_rb, NULL);
+
+	/*
+	 * Make sure that the IBs described in the CP registers are on the
+	 * list of objects
+	 */
+	kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);
+	kgsl_regread(device, REG_CP_IB1_BUFSZ, &ibsize);
+
+	if (ibsize)
+		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
+			ibbase, ibsize);
+
+	kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
+	kgsl_regread(device, REG_CP_IB2_BUFSZ, &ibsize);
+
+	if (ibsize)
+		push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
+			ibbase, ibsize);
+
+	/*
+	 * Go through the list of found objects and dump each one.  As the IBs
+	 * are parsed, more objects might be found, and objbufptr will increase
+	 */
+	for (i = 0; i < objbufptr; i++)
+		snapshot = dump_object(device, i, snapshot, remain);
+
+	/*
+	 * Only dump the istore on a hang - reading it on a running system
+	 * has a non 0 chance of hanging the GPU
+	 */
+
+	if (hang) {
+		snapshot = kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_ISTORE, snapshot, remain,
+			snapshot_istore, NULL);
+	}
+
+	/* Add GPU specific sections - registers mainly, but other stuff too */
+	if (adreno_dev->gpudev->snapshot)
+		snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
+			remain, hang);
+
+	return snapshot;
+}
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index c6e78dd..1b6696b 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -2097,6 +2097,8 @@
 	if (minor == KGSL_DEVICE_MAX)
 		return;
 
+	kgsl_device_snapshot_close(device);
+
 	kgsl_cffdump_close(device->id);
 	kgsl_pwrctrl_uninit_sysfs(device);
 
@@ -2199,6 +2201,9 @@
 
 	idr_init(&device->context_idr);
 
+	/* Initalize the snapshot engine */
+	kgsl_device_snapshot_init(device);
+
 	/* sysfs and debugfs initalization - failure here is non fatal */
 
 	/* Initialize logging */
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index f82b038..8ea5279 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -90,6 +90,8 @@
 		struct kgsl_power_stats *stats);
 	void (*irqctrl)(struct kgsl_device *device, int state);
 	unsigned int (*gpuid)(struct kgsl_device *device);
+	void * (*snapshot)(struct kgsl_device *device, void *snapshot,
+		int *remain, int hang);
 	/* Optional functions - these functions are not mandatory.  The
 	   driver will check that the function pointer is not NULL before
 	   calling the hook */
@@ -164,6 +166,15 @@
 	struct idr context_idr;
 	struct early_suspend display_off;
 
+	void *snapshot;		/* Pointer to the snapshot memory region */
+	int snapshot_maxsize;   /* Max size of the snapshot region */
+	int snapshot_size;      /* Current size of the snapshot region */
+	u32 snapshot_timestamp;	/* Timestamp of the last valid snapshot */
+	int snapshot_frozen;	/* 1 if the snapshot output is frozen until
+				   it gets read by the user.  This avoids
+				   losing the output on multiple hangs  */
+	struct kobject snapshot_kobj;
+
 	/* Logging levels */
 	int cmd_log;
 	int ctxt_log;
@@ -320,4 +331,8 @@
 
 const char *kgsl_pwrstate_to_str(unsigned int state);
 
+int kgsl_device_snapshot_init(struct kgsl_device *device);
+int kgsl_device_snapshot(struct kgsl_device *device, int hang);
+void kgsl_device_snapshot_close(struct kgsl_device *device);
+
 #endif  /* __KGSL_DEVICE_H */
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
new file mode 100644
index 0000000..72df148b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -0,0 +1,484 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/vmalloc.h>
+#include <linux/time.h>
+#include <linux/sysfs.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+
+#include "kgsl.h"
+#include "kgsl_log.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+/* idr_for_each function to count the number of contexts */
+
+static int snapshot_context_count(int id, void *ptr, void *data)
+{
+	int *count = data;
+	*count = *count + 1;
+
+	return 0;
+}
+
+/*
+ * To simplify the iterator loop use a global pointer instead of trying
+ * to pass around double star references to the snapshot data
+ */
+
+static void *_ctxtptr;
+
+static int snapshot_context_info(int id, void *ptr, void *data)
+{
+	struct kgsl_snapshot_linux_context *header = _ctxtptr;
+	struct kgsl_context *context = ptr;
+	struct kgsl_device *device = context->dev_priv->device;
+
+	header->id = id;
+
+	/* Future-proof for per-context timestamps - for now, just
+	 * return the global timestamp for all contexts
+	 */
+
+	header->timestamp_queued = -1;
+	header->timestamp_retired = device->ftbl->readtimestamp(device,
+		KGSL_TIMESTAMP_RETIRED);
+
+	_ctxtptr += sizeof(struct kgsl_snapshot_linux_context);
+
+	return 0;
+}
+
+/* Snapshot the Linux specific information */
+static int snapshot_os(struct kgsl_device *device,
+	void *snapshot, int remain, void *priv)
+{
+	struct kgsl_snapshot_linux *header = snapshot;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct task_struct *task;
+	pid_t pid;
+	int hang = (int) priv;
+	int ctxtcount = 0;
+	int size = sizeof(*header);
+
+	/* Figure out how many active contexts there are - these will
+	 * be appended on the end of the structure */
+
+	idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
+
+	size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
+
+	/* Make sure there is enough room for the data */
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "OS");
+		return 0;
+	}
+
+	memset(header, 0, sizeof(*header));
+
+	header->osid = KGSL_SNAPSHOT_OS_LINUX;
+
+	header->state = hang ? SNAPSHOT_STATE_HUNG : SNAPSHOT_STATE_RUNNING;
+
+	/* Get the kernel build information */
+	strlcpy(header->release, utsname()->release, sizeof(header->release));
+	strlcpy(header->version, utsname()->version, sizeof(header->version));
+
+	/* Get the Unix time for the timestamp */
+	header->seconds = get_seconds();
+
+	/* Remember the power information */
+	header->power_flags = pwr->power_flags;
+	header->power_level = pwr->active_pwrlevel;
+	header->power_interval_timeout = pwr->interval_timeout;
+	header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
+	header->busclk = kgsl_get_clkrate(pwr->ebi1_clk);
+
+	/* Future proof for per-context timestamps */
+	header->current_context = -1;
+
+	/* Get the current PT base */
+	header->ptbase = kgsl_mmu_get_current_ptbase(device);
+	/* And the PID for the task leader */
+	pid = header->pid = kgsl_mmu_get_ptname_from_ptbase(header->ptbase);
+
+	task = find_task_by_vpid(pid);
+
+	if (task)
+		get_task_comm(header->comm, task);
+
+	header->ctxtcount = ctxtcount;
+
+	/* append information for each context */
+	_ctxtptr = snapshot + sizeof(*header);
+	idr_for_each(&device->context_idr, snapshot_context_info, NULL);
+
+	/* Return the size of the data segment */
+	return size;
+}
+/*
+ * kgsl_snapshot_dump_indexed_regs - helper function to dump indexed registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_indexed_registers data
+ *
+ * Given a indexed register cmd/data pair and a count, dump each indexed
+ * register
+ */
+
+int kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device,
+	void *snapshot, int remain, void *priv)
+{
+	struct kgsl_snapshot_indexed_registers *iregs = priv;
+	struct kgsl_snapshot_indexed_regs *header = snapshot;
+	unsigned int *data = snapshot + sizeof(*header);
+	int i;
+
+	if (remain < (iregs->count * 4) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS");
+		return 0;
+	}
+
+	header->index_reg = iregs->index;
+	header->data_reg = iregs->data;
+	header->count = iregs->count;
+	header->start = iregs->start;
+
+	for (i = 0; i < iregs->count; i++) {
+		kgsl_regwrite(device, iregs->index, iregs->start + i);
+		kgsl_regread(device, iregs->data, &data[i]);
+	}
+
+	return (iregs->count * 4) + sizeof(*header);
+}
+EXPORT_SYMBOL(kgsl_snapshot_dump_indexed_regs);
+
+/*
+ * kgsl_snapshot_dump_regs - helper function to dump device registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_registers data
+ *
+ * Given an array of register ranges pairs (start,end [inclusive]), dump the
+ * registers into a snapshot register section.  The snapshot region stores a
+ * part of dwords for each register - the word address of the register, and
+ * the value.
+ */
+int kgsl_snapshot_dump_regs(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = snapshot;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = snapshot + sizeof(*header);
+	int count = 0, i, j;
+
+	/* Figure out how many registers we are going to dump */
+
+	for (i = 0; i < regs->count; i++) {
+		int start = regs->regs[i * 2];
+		int end = regs->regs[i * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (i = 0; i < regs->count; i++) {
+		unsigned int start = regs->regs[i * 2];
+		unsigned int end = regs->regs[i * 2 + 1];
+
+		for (j = start; j <= end; j++) {
+			unsigned int val;
+
+			kgsl_regread(device, j, &val);
+			*data++ = j;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+EXPORT_SYMBOL(kgsl_snapshot_dump_regs);
+
+/*
+ * kgsl_snapshot - construct a device snapshot
+ * @device - device to snapshot
+ * @hang - set to 1 if the snapshot was triggered following a hnag
+ * Given a device, construct a binary snapshot dump of the current device state
+ * and store it in the device snapshot memory.
+ */
+int kgsl_device_snapshot(struct kgsl_device *device, int hang)
+{
+	struct kgsl_snapshot_header *header = device->snapshot;
+	int remain = device->snapshot_maxsize - sizeof(*header);
+	void *snapshot;
+
+	/*
+	 * The first hang is always the one we are interested in. To
+	 * avoid a subsequent hang blowing away the first, the snapshot
+	 * is frozen until it is dumped via sysfs.
+	 *
+	 * Note that triggered snapshots are always taken regardless
+	 * of the state and never frozen.
+	 */
+
+	if (hang && device->snapshot_frozen == 1)
+		return 0;
+
+	if (device->snapshot == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: No snapshot memory available\n");
+		return -ENOMEM;
+	}
+
+	if (remain < sizeof(*header)) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Not enough memory for the header\n");
+		return -ENOMEM;
+	}
+
+	header->magic = SNAPSHOT_MAGIC;
+
+	header->gpuid = kgsl_gpuid(device);
+
+	/* Get a pointer to the first section (right after the header) */
+	snapshot = ((void *) device->snapshot) + sizeof(*header);
+
+	/* Build the Linux specific header */
+	snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
+		snapshot, &remain, snapshot_os, (void *) hang);
+
+	/* Get the device specific sections */
+	if (device->ftbl->snapshot)
+		snapshot = device->ftbl->snapshot(device, snapshot, &remain,
+			hang);
+
+	/* Add the empty end section to let the parser know we are done */
+	snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_END,
+		snapshot, &remain, NULL, NULL);
+
+	device->snapshot_timestamp = get_seconds();
+	device->snapshot_size = (int) (snapshot - device->snapshot);
+
+	/* Freeze the snapshot on a hang until it gets read */
+	device->snapshot_frozen = (hang) ? 1 : 0;
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot);
+
+/* An attribute for showing snapshot details */
+struct kgsl_snapshot_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kgsl_device *device, char *buf);
+	ssize_t (*store)(struct kgsl_device *device, const char *buf,
+		size_t count);
+};
+
+#define to_snapshot_attr(a) \
+container_of(a, struct kgsl_snapshot_attribute, attr)
+
+#define kobj_to_device(a) \
+container_of(a, struct kgsl_device, snapshot_kobj)
+
+/* Dump the sysfs binary data to the user */
+static ssize_t snapshot_show(struct file *filep, struct kobject *kobj,
+	struct bin_attribute *attr, char *buf, loff_t off,
+	size_t count)
+{
+	struct kgsl_device *device = kobj_to_device(kobj);
+
+	if (device == NULL)
+		return 0;
+
+	/* Return nothing if we haven't taken a snapshot yet */
+	if (device->snapshot_timestamp == 0)
+		return 0;
+
+	/* Get the mutex to keep things from changing while we are dumping */
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Release the freeze on the snapshot the first time the buffer is read
+	 */
+
+	device->snapshot_frozen = 0;
+
+	if (off >= device->snapshot_size) {
+		count = 0;
+		goto exit;
+	}
+
+	if (off + count > device->snapshot_size)
+		count = device->snapshot_size - off;
+
+	memcpy(buf, device->snapshot + off, count);
+
+exit:
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+/* Show the timestamp of the last collected snapshot */
+static ssize_t timestamp_show(struct kgsl_device *device, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%x\n", device->snapshot_timestamp);
+}
+
+/* manually trigger a new snapshot to be collected */
+static ssize_t trigger_store(struct kgsl_device *device, const char *buf,
+	size_t count)
+{
+	if (device && count > 0) {
+		mutex_lock(&device->mutex);
+		kgsl_device_snapshot(device, 0);
+		mutex_unlock(&device->mutex);
+	}
+
+	return count;
+}
+
+static struct bin_attribute snapshot_attr = {
+	.attr.name = "dump",
+	.attr.mode = 0444,
+	.size = 0,
+	.read = snapshot_show
+};
+
+#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \
+struct kgsl_snapshot_attribute attr_##_name = { \
+	.attr = { .name = __stringify(_name), .mode = _mode }, \
+	.show = _show, \
+	.store = _store, \
+}
+
+SNAPSHOT_ATTR(trigger, 0600, NULL, trigger_store);
+SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL);
+
+static void snapshot_sysfs_release(struct kobject *kobj)
+{
+}
+
+static ssize_t snapshot_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->show)
+		ret = pattr->show(device, buf);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static ssize_t snapshot_sysfs_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t count)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->store)
+		ret = pattr->store(device, buf, count);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static const struct sysfs_ops snapshot_sysfs_ops = {
+	.show = snapshot_sysfs_show,
+	.store = snapshot_sysfs_store,
+};
+
+static struct kobj_type ktype_snapshot = {
+	.sysfs_ops = &snapshot_sysfs_ops,
+	.default_attrs = NULL,
+	.release = snapshot_sysfs_release,
+};
+
+/* kgsl_device_snapshot_init - Add resources for the device GPU snapshot
+ * @device - The device to initalize
+ *
+ * Allocate memory for a GPU snapshot for the specified device,
+ * and create the sysfs files to manage it
+ */
+
+int kgsl_device_snapshot_init(struct kgsl_device *device)
+{
+	int ret;
+
+	if (device->snapshot == NULL)
+		device->snapshot = vmalloc(KGSL_SNAPSHOT_MEMSIZE);
+
+	if (device->snapshot == NULL)
+		return -ENOMEM;
+
+	device->snapshot_maxsize = KGSL_SNAPSHOT_MEMSIZE;
+	device->snapshot_timestamp = 0;
+
+	ret = kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot,
+		&device->dev->kobj, "snapshot");
+	if (ret)
+		goto done;
+
+	ret = sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_trigger.attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_timestamp.attr);
+
+done:
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_init);
+
+/* kgsl_device_snapshot_close - Take down snapshot memory for a device
+ * @device - Pointer to the kgsl_device
+ *
+ * Remove the sysfs files and free the memory allocated for the GPU
+ * snapshot
+ */
+
+void kgsl_device_snapshot_close(struct kgsl_device *device)
+{
+	sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	sysfs_remove_file(&device->snapshot_kobj, &attr_trigger.attr);
+	sysfs_remove_file(&device->snapshot_kobj, &attr_timestamp.attr);
+
+	kobject_put(&device->snapshot_kobj);
+
+	vfree(device->snapshot);
+
+	device->snapshot = NULL;
+	device->snapshot_maxsize = 0;
+	device->snapshot_timestamp = 0;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_close);
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
new file mode 100644
index 0000000..4fdc8a1
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -0,0 +1,259 @@
+/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _KGSL_SNAPSHOT_H_
+#define _KGSL_SNAPSHOT_H_
+
+#include <linux/types.h>
+
+/* Snapshot header */
+
+#define SNAPSHOT_MAGIC 0x504D0001
+
+/* GPU ID scheme:
+ * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D)
+ * [00:16] - GPU specific identifier
+ */
+
+struct kgsl_snapshot_header {
+	__u32 magic; /* Magic identifier */
+	__u32 gpuid; /* GPU ID - see above */
+} __packed;
+
+/* Section header */
+#define SNAPSHOT_SECTION_MAGIC 0xABCD
+
+struct kgsl_snapshot_section_header {
+	__u16 magic; /* Magic identifier */
+	__u16 id;    /* Type of section */
+	__u32 size;  /* Size of the section including this header */
+} __packed;
+
+/* Section identifiers */
+#define KGSL_SNAPSHOT_SECTION_OS           0x0101
+#define KGSL_SNAPSHOT_SECTION_REGS         0x0201
+#define KGSL_SNAPSHOT_SECTION_RB           0x0301
+#define KGSL_SNAPSHOT_SECTION_IB           0x0401
+#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501
+#define KGSL_SNAPSHOT_SECTION_ISTORE       0x0801
+#define KGSL_SNAPSHOT_SECTION_DEBUG        0x0901
+#define KGSL_SNAPSHOT_SECTION_END          0xFFFF
+
+/* OS sub-section header */
+#define KGSL_SNAPSHOT_OS_LINUX             0x0001
+
+/* Linux OS specific information */
+
+#define SNAPSHOT_STATE_HUNG 0
+#define SNAPSHOT_STATE_RUNNING 1
+
+struct kgsl_snapshot_linux {
+	int osid;                   /* subsection OS identifier */
+	int state;		    /* 1 if the thread is running, 0 for hung */
+	__u32 seconds;		    /* Unix timestamp for the snapshot */
+	__u32 power_flags;            /* Current power flags */
+	__u32 power_level;            /* Current power level */
+	__u32 power_interval_timeout; /* Power interval timeout */
+	__u32 grpclk;                 /* Current GP clock value */
+	__u32 busclk;		    /* Current busclk value */
+	__u32 ptbase;		    /* Current ptbase */
+	__u32 pid;		    /* PID of the process that owns the PT */
+	__u32 current_context;	    /* ID of the current context */
+	__u32 ctxtcount;	    /* Number of contexts appended to section */
+	unsigned char release[32];  /* kernel release */
+	unsigned char version[32];  /* kernel version */
+	unsigned char comm[16];	    /* Name of the process that owns the PT */
+} __packed;
+
+/*
+ * This structure contains a record of an active context.
+ * These are appended one after another in the OS section below
+ * the header above
+ */
+
+struct kgsl_snapshot_linux_context {
+	__u32 id;			/* The context ID */
+	__u32 timestamp_queued;		/* The last queued timestamp */
+	__u32 timestamp_retired;	/* The last timestamp retired by HW */
+};
+
+/* Ringbuffer sub-section header */
+struct kgsl_snapshot_rb {
+	int start;  /* dword at the start of the dump */
+	int end;    /* dword at the end of the dump */
+	int rbsize; /* Size (in dwords) of the ringbuffer */
+	int wptr;   /* Current index of the CPU write pointer */
+	int rptr;   /* Current index of the GPU read pointer */
+	int count;  /* Number of dwords in the dump */
+} __packed;
+
+/* Indirect buffer sub-section header */
+struct kgsl_snapshot_ib {
+	__u32 gpuaddr; /* GPU address of the the IB */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the IB */
+} __packed;
+
+/* Register sub-section header */
+struct kgsl_snapshot_regs {
+	__u32 count; /* Number of register pairs in the section */
+} __packed;
+
+/* Indexed register sub-section header */
+struct kgsl_snapshot_indexed_regs {
+	__u32 index_reg; /* Offset of the index register for this section */
+	__u32 data_reg;  /* Offset of the data register for this section */
+	int start;     /* Starting index */
+	int count;     /* Number of dwords in the data */
+} __packed;
+
+/* Istore sub-section header */
+struct kgsl_snapshot_istore {
+	int count;   /* Number of instructions in the istore */
+} __packed;
+
+/* Debug data sub-section header */
+
+#define SNAPSHOT_DEBUG_SX         1
+#define SNAPSHOT_DEBUG_CP         2
+#define SNAPSHOT_DEBUG_SQ         3
+#define SNAPSHOT_DEBUG_SQTHREAD   4
+#define SNAPSHOT_DEBUG_MIU        5
+
+struct kgsl_snapshot_debug {
+	int type;    /* Type identifier for the attached tata */
+	int size;   /* Size of the section in bytes */
+} __packed;
+
+#ifdef __KERNEL__
+
+/* Allocate 512K for each device snapshot */
+#define KGSL_SNAPSHOT_MEMSIZE (512 * 1024)
+
+struct kgsl_device;
+/*
+ * A helper macro to print out "not enough memory functions" - this
+ * makes it easy to standardize the messages as well as cut down on
+ * the number of strings in the binary
+ */
+
+#define SNAPSHOT_ERR_NOMEM(_d, _s) \
+	KGSL_DRV_ERR((_d), \
+	"snapshot: not enough snapshot memory for section %s\n", (_s))
+
+/*
+ * kgsl_snapshot_add_section - Add a new section to the GPU snapshot
+ * @device - the KGSL device being snapshotted
+ * @id - the section id
+ * @snapshot - pointer to the memory for the snapshot
+ * @remain - pointer to the number of bytes left in the snapshot region
+ * @func - Function pointer to fill the section
+ * @priv - Priv pointer to pass to the function
+ *
+ * Set up a KGSL snapshot header by filling the memory with the callback
+ * function and adding the standard section header
+ */
+
+static inline void *kgsl_snapshot_add_section(struct kgsl_device *device,
+	u16 id, void *snapshot, int *remain,
+	int (*func)(struct kgsl_device *, void *, int, void *), void *priv)
+{
+	struct kgsl_snapshot_section_header *header = snapshot;
+	void *data = snapshot + sizeof(*header);
+	int ret = 0;
+
+	/*
+	 * Sanity check to make sure there is enough for the header.  The
+	 * callback will check to make sure there is enough for the rest
+	 * of the data.  If there isn't enough room then don't advance the
+	 * pointer.
+	 */
+
+	if (*remain < sizeof(*header))
+		return snapshot;
+
+	/* It is legal to have no function (i.e. - make an empty section) */
+
+	if (func) {
+		ret = func(device, data, *remain, priv);
+
+		/*
+		 * If there wasn't enough room for the data then don't bother
+		 * setting up the header.
+		 */
+
+		if (ret == 0)
+			return snapshot;
+	}
+
+	header->magic = SNAPSHOT_SECTION_MAGIC;
+	header->id = id;
+	header->size = ret + sizeof(*header);
+
+	/* Decrement the room left in the snapshot region */
+	*remain -= header->size;
+	/* Advance the pointer to the end of the next function */
+	return snapshot + header->size;
+}
+
+/* A common helper function to dump a range of registers.  This will be used in
+ * the GPU specific devices like this:
+ *
+ * struct kgsl_snapshot_registers priv;
+ * priv.regs = registers_array;;
+ * priv.count = num_registers;
+ *
+ * kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+ *	remain, kgsl_snapshot_dump_regs, &priv).
+ *
+ * Pass in an array of register range pairs in the form of:
+ * start reg, stop reg
+ * All the registers between start and stop inclusive will be dumped
+ */
+
+struct kgsl_snapshot_registers {
+	unsigned int *regs;  /* Pointer to the array of register ranges */
+	int count;	     /* Number of entries in the array */
+};
+
+int kgsl_snapshot_dump_regs(struct kgsl_device *device, void *snapshot,
+	int remain, void *priv);
+
+/*
+ * A common helper function to dump a set of indexed registers. Use it
+ * like this:
+ *
+ * struct kgsl_snapshot_indexed_registers priv;
+ * priv.index = REG_INDEX;
+ * priv.data = REG_DATA;
+ * priv.count = num_registers
+ *
+ * kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS,
+ *	snapshot, remain, kgsl_snapshot_dump_indexed_regs, &priv).
+ *
+ * The callback function will write an index from 0 to priv.count to
+ * the index register and read the data from the data register.
+ */
+
+struct kgsl_snapshot_indexed_registers {
+	unsigned int index; /* Offset of the index register */
+	unsigned int data;  /* Offset of the data register */
+	unsigned int start;	/* Index to start with */
+	unsigned int count; /* Number of values to read from the pair */
+};
+
+int kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device,
+	void *snapshot, int remain, void *priv);
+
+#endif
+#endif