msm: kgsl: Fix context reference counting

Get rid of kgsl_find_context. Use instead kgsl_context_get that does
correct RCU read locking around the itr_find and increases the
reference count on the context before returning it.  This eliminates
the chance that a context will be destroyed while somebody is still
using it.  Of course increased use of kgsl_context_get is accompanied
by kgsl_context_put in all the right places.

Change-Id: Ic0dedbad73d497fd9b451aefad8e5b28d33b829d
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 9f4bb88..f0ce8f9 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -315,14 +315,15 @@
 		return kgsl_mmu_device_setstate(&device->mmu, flags);
 	num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
 
-	context = idr_find(&device->context_idr, context_id);
+	context = kgsl_context_get(device, context_id);
+
 	if (context == NULL)
 		return;
 	adreno_ctx = context->devctxt;
 
 	if (kgsl_mmu_enable_clk(&device->mmu,
 				KGSL_IOMMU_CONTEXT_USER))
-		return;
+		goto done;
 
 	cmds += __adreno_add_idle_indirect_cmds(cmds,
 		device->mmu.setstate_memory.gpuaddr +
@@ -440,6 +441,8 @@
 		KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
 		BUG();
 	}
+done:
+	kgsl_context_put(context);
 }
 
 static void adreno_gpummu_setstate(struct kgsl_device *device,
@@ -467,7 +470,7 @@
 	 * easier to filter out the mmu accesses from the dump
 	 */
 	if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) {
-		context = idr_find(&device->context_idr, context_id);
+		context = kgsl_context_get(device, context_id);
 		if (context == NULL)
 			return;
 		adreno_ctx = context->devctxt;
@@ -547,6 +550,8 @@
 		adreno_ringbuffer_issuecmds(device, adreno_ctx,
 					KGSL_CMD_FLAGS_PMODE,
 					&link[0], sizedwords);
+
+		kgsl_context_put(context);
 	} else {
 		kgsl_mmu_device_setstate(&device->mmu, flags);
 	}
@@ -1360,6 +1365,8 @@
 	 * since thats the guilty party, if fault tolerance failed then
 	 * mark all as guilty
 	 */
+
+	rcu_read_lock();
 	while ((context = idr_get_next(&device->context_idr, &next))) {
 		struct adreno_context *adreno_context = context->devctxt;
 		if (ft_status) {
@@ -1378,6 +1385,7 @@
 		}
 		next = next + 1;
 	}
+	rcu_read_unlock();
 }
 
 static void adreno_set_max_ts_for_bad_ctxs(struct kgsl_device *device)
@@ -1388,6 +1396,7 @@
 	struct adreno_context *temp_adreno_context;
 	int next = 0;
 
+	rcu_read_lock();
 	while ((context = idr_get_next(&device->context_idr, &next))) {
 		temp_adreno_context = context->devctxt;
 		if (temp_adreno_context->flags & CTXT_FLAGS_GPU_HANG) {
@@ -1402,6 +1411,7 @@
 		}
 		next = next + 1;
 	}
+	rcu_read_unlock();
 }
 
 static void adreno_destroy_ft_data(struct adreno_ft_data *ft_data)
@@ -1819,7 +1829,8 @@
 	struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
 	unsigned int long_ib = 0;
 
-	context = idr_find(&device->context_idr, ft_data->context_id);
+	context = kgsl_context_get(device, ft_data->context_id);
+
 	if (context == NULL) {
 		KGSL_FT_ERR(device, "Last context unknown id:%d\n",
 			ft_data->context_id);
@@ -1996,17 +2007,21 @@
 	/* ringbuffer now has data from the last valid context id,
 	 * so restore the active_ctx to the last valid context */
 	if (ft_data->last_valid_ctx_id) {
-		struct kgsl_context *last_ctx =
-				idr_find(&device->context_idr,
-				ft_data->last_valid_ctx_id);
+		struct kgsl_context *last_ctx = kgsl_context_get(device,
+			ft_data->last_valid_ctx_id);
+
 		if (last_ctx)
 			adreno_dev->drawctxt_active = last_ctx->devctxt;
+
+		kgsl_context_put(last_ctx);
 	}
 
 done:
 	/* Turn off iommu clocks */
 	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
 		kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
+
+	kgsl_context_put(context);
 	return ret;
 }
 
@@ -2463,7 +2478,9 @@
 	struct kgsl_context *context;
 	struct adreno_context *adreno_context = NULL;
 	int next = 0;
+	struct kgsl_memdesc *desc = NULL;
 
+	rcu_read_lock();
 	while (1) {
 		context = idr_get_next(&device->context_idr, &next);
 		if (context == NULL)
@@ -2473,20 +2490,19 @@
 
 		if (kgsl_mmu_pt_equal(&device->mmu, adreno_context->pagetable,
 					pt_base)) {
-			struct kgsl_memdesc *desc;
-
 			desc = &adreno_context->gpustate;
 			if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size))
-				return desc;
+				break;
 
 			desc = &adreno_context->context_gmem_shadow.gmemshadow;
 			if (kgsl_gpuaddr_in_memdesc(desc, gpuaddr, size))
-				return desc;
+				break;
 		}
 		next = next + 1;
+		desc = NULL;
 	}
-
-	return NULL;
+	rcu_read_unlock();
+	return desc;
 }
 
 struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
@@ -3134,7 +3150,8 @@
 	case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET:
 		binbase = data;
 
-		context = kgsl_find_context(dev_priv, binbase->drawctxt_id);
+		context = kgsl_context_get_owner(dev_priv,
+			binbase->drawctxt_id);
 		if (context) {
 			adreno_drawctxt_set_bin_base_offset(
 				dev_priv->device, context, binbase->offset);
@@ -3145,6 +3162,8 @@
 				"device_id=%d\n",
 				binbase->drawctxt_id, dev_priv->device->id);
 		}
+
+		kgsl_context_put(context);
 		break;
 
 	default:
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 5fdcf19..ee82bd6 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -740,7 +740,9 @@
 			(unsigned int *) &context_id,
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 				current_context));
-	context = idr_find(&device->context_idr, context_id);
+
+	context = kgsl_context_get(device, context_id);
+
 	if (context) {
 		ts_processed = kgsl_readtimestamp(device, context,
 						  KGSL_TIMESTAMP_RETIRED);
@@ -749,6 +751,8 @@
 	} else
 		KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id);
 
+	kgsl_context_put(context);
+
 	num_item = adreno_ringbuffer_count(&adreno_dev->ringbuffer,
 						cp_rb_rptr);
 	if (num_item <= 0)
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 405f9ac..9d50e6b 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1178,11 +1178,13 @@
 	if (0xFFFFFFFF == ft_data->start_of_replay_cmds)
 		return;
 
-	k_ctxt = idr_find(&device->context_idr, ft_data->context_id);
+	k_ctxt = kgsl_context_get(device, ft_data->context_id);
+
 	if (k_ctxt) {
 		a_ctxt = k_ctxt->devctxt;
 		if (a_ctxt->flags & CTXT_FLAGS_PREAMBLE)
 			_turn_preamble_on_for_ib_seq(rb, rb_rptr);
+		kgsl_context_put(k_ctxt);
 	}
 	k_ctxt = NULL;
 
@@ -1213,7 +1215,8 @@
 			/* if context switches to a context that did not cause
 			 * hang then start saving the rb contents as those
 			 * commands can be executed */
-			k_ctxt = idr_find(&rb->device->context_idr, val2);
+			k_ctxt = kgsl_context_get(rb->device, val2);
+
 			if (k_ctxt) {
 				a_ctxt = k_ctxt->devctxt;
 
@@ -1251,6 +1254,7 @@
 				copy_rb_contents = 0;
 			}
 			}
+			kgsl_context_put(k_ctxt);
 		}
 
 		if (copy_rb_contents)
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 602d47b..a218c08 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -972,7 +972,7 @@
 			result = -EFAULT;
 			break;
 		}
-		context = kgsl_find_context(dev_priv, id);
+		context = kgsl_context_get_owner(dev_priv, id);
 		if (!context) {
 			result = -EINVAL;
 			break;
@@ -982,12 +982,14 @@
 		 * the out parameter
 		 */
 		if (copy_to_user(param->value, &(context->reset_status),
-			sizeof(unsigned int))) {
+			sizeof(unsigned int)))
 			result = -EFAULT;
-			break;
+		else {
+			/* Clear reset status once its been queried */
+			context->reset_status = KGSL_CTX_STAT_NO_ERROR;
 		}
-		/* Clear reset status once its been queried */
-		context->reset_status = KGSL_CTX_STAT_NO_ERROR;
+
+		kgsl_context_put(context);
 		break;
 	}
 	default:
@@ -1063,19 +1065,14 @@
 {
 	struct kgsl_device_waittimestamp_ctxtid *param = data;
 	struct kgsl_context *context;
-	int result;
+	long result = -EINVAL;
 
-	context = kgsl_find_context(dev_priv, param->context_id);
-	if (context == NULL)
-		return -EINVAL;
-	/*
-	 * A reference count is needed here, because waittimestamp may
-	 * block with the device mutex unlocked and userspace could
-	 * request for the context to be destroyed during that time.
-	 */
-	kgsl_context_get(context);
-	result = _device_waittimestamp(dev_priv, context,
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+
+	if (context)
+		result = _device_waittimestamp(dev_priv, context,
 			param->timestamp, param->timeout);
+
 	kgsl_context_put(context);
 	return result;
 }
@@ -1088,7 +1085,7 @@
 	struct kgsl_ibdesc *ibdesc;
 	struct kgsl_context *context;
 
-	context = kgsl_find_context(dev_priv, param->drawctxt_id);
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
 	if (context == NULL) {
 		result = -EINVAL;
 		goto done;
@@ -1164,7 +1161,7 @@
 free_ibdesc:
 	kfree(ibdesc);
 done:
-
+	kgsl_context_put(context);
 	return result;
 }
 
@@ -1197,14 +1194,16 @@
 {
 	struct kgsl_cmdstream_readtimestamp_ctxtid *param = data;
 	struct kgsl_context *context;
+	long result = -EINVAL;
 
-	context = kgsl_find_context(dev_priv, param->context_id);
-	if (context == NULL)
-		return -EINVAL;
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
 
-
-	return _cmdstream_readtimestamp(dev_priv, context,
+	if (context)
+		result = _cmdstream_readtimestamp(dev_priv, context,
 			param->type, &param->timestamp);
+
+	kgsl_context_put(context);
+	return result;
 }
 
 static void kgsl_freemem_event_cb(struct kgsl_device *device,
@@ -1261,16 +1260,14 @@
 {
 	struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data;
 	struct kgsl_context *context;
+	long result = -EINVAL;
 
-	context = kgsl_find_context(dev_priv, param->context_id);
-	if (context == NULL) {
-		KGSL_DRV_ERR(dev_priv->device,
-			"invalid drawctxt context_id %d\n", param->context_id);
-		return -EINVAL;
-	}
-
-	return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context)
+		result = _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
 			context, param->timestamp, param->type);
+	kgsl_context_put(context);
+	return result;
 }
 
 static long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
@@ -1306,19 +1303,18 @@
 static long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv,
 					unsigned int cmd, void *data)
 {
-	int result = 0;
 	struct kgsl_drawctxt_destroy *param = data;
 	struct kgsl_context *context;
+	long result = -EINVAL;
 
-	context = kgsl_find_context(dev_priv, param->drawctxt_id);
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
 
-	if (context == NULL) {
-		result = -EINVAL;
-		goto done;
+	if (context) {
+		kgsl_context_detach(context);
+		result = 0;
 	}
 
-	kgsl_context_detach(context);
-done:
+	kgsl_context_put(context);
 	return result;
 }
 
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 8a10004..f9fc40c 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -377,17 +377,7 @@
 	return 0;
 }
 
-static inline struct kgsl_context *
-kgsl_find_context(struct kgsl_device_private *dev_priv, uint32_t id)
-{
-	struct kgsl_context *ctxt =
-		idr_find(&dev_priv->device->context_idr, id);
 
-	/* Make sure that the context belongs to the current instance so
-	   that other processes can't guess context IDs and mess things up */
-
-	return  (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL;
-}
 
 int kgsl_check_timestamp(struct kgsl_device *device,
 		struct kgsl_context *context, unsigned int timestamp);
@@ -411,21 +401,6 @@
 	return pdev->dev.platform_data;
 }
 
-/**
- * kgsl_context_get - Get context reference count
- * @context
- *
- * Asynchronous code that holds a pointer to a context
- * must hold a reference count on it. The kgsl device
- * mutex must be held while the context reference count
- * is changed.
- */
-static inline void
-kgsl_context_get(struct kgsl_context *context)
-{
-	kref_get(&context->refcount);
-}
-
 void kgsl_context_destroy(struct kref *kref);
 
 /**
@@ -436,7 +411,74 @@
 static inline void
 kgsl_context_put(struct kgsl_context *context)
 {
-	kref_put(&context->refcount, kgsl_context_destroy);
+	if (context)
+		kref_put(&context->refcount, kgsl_context_destroy);
+}
+
+/**
+ * _kgsl_context_get() - lightweight function to just increment the ref count
+ * @context: Pointer to the KGSL context
+ *
+ * Get a reference to the specified KGSL context structure. This is a
+ * lightweight way to just increase the refcount on a known context rather then
+ * walking through kgsl_context_get and searching the iterator
+ */
+static inline void _kgsl_context_get(struct kgsl_context *context)
+{
+	if (context)
+		kref_get(&context->refcount);
+}
+
+/**
+ * kgsl_context_get - get a pointer to a KGSL context
+ * @devicex - Pointer to the KGSL device that owns the context
+ * @id - Context ID to return
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put.  This function is for internal use because it
+ * doesn't validate the ownership of the context with the calling process - use
+ * kgsl_context_get_owner for that
+ */
+static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device,
+		uint32_t id)
+{
+	struct kgsl_context *context = NULL;
+
+	rcu_read_lock();
+	context = idr_find(&device->context_idr, id);
+
+	_kgsl_context_get(context);
+
+	rcu_read_unlock();
+	return context;
+}
+
+/**
+ * kgsl_context_get_owner - get a pointer to a KGSL context
+ * @dev_priv - Pointer to the owner of the requesting process
+ * @id - Context ID to return
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put. This function validates that the context id
+ * given is owned by the dev_priv instancet that is passed in.  see
+ * kgsl_context_get for the internal version that doesn't do the check
+ */
+static inline struct kgsl_context *kgsl_context_get_owner(
+		struct kgsl_device_private *dev_priv, uint32_t id)
+{
+	struct kgsl_context *context;
+
+	context = kgsl_context_get(dev_priv->device, id);
+
+	/* Verify that the context belongs to the dev_priv instance */
+	if (context && context->dev_priv != dev_priv) {
+		kgsl_context_put(context);
+		return NULL;
+	}
+
+	return context;
 }
 
 /**
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
index b1b11cc..c5b7e91 100644
--- a/drivers/gpu/msm/kgsl_events.c
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -59,7 +59,7 @@
 		return -EINVAL;
 
 	if (id != KGSL_MEMSTORE_GLOBAL) {
-		context = idr_find(&device->context_idr, id);
+		context = kgsl_context_get(device, id);
 		if (context == NULL)
 			return -EINVAL;
 	}
@@ -75,12 +75,15 @@
 	if (timestamp_cmp(cur_ts, ts) >= 0) {
 		trace_kgsl_fire_event(id, ts, 0);
 		cb(device, priv, id, ts);
+		kgsl_context_put(context);
 		return 0;
 	}
 
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
-	if (event == NULL)
+	if (event == NULL) {
+		kgsl_context_put(context);
 		return -ENOMEM;
+	}
 
 	event->context = context;
 	event->timestamp = ts;
@@ -91,10 +94,6 @@
 
 	trace_kgsl_register_event(id, ts);
 
-	/* inc refcount to avoid race conditions in cleanup */
-	if (context)
-		kgsl_context_get(context);
-
 	/* Add the event to either the owning context or the global list */
 
 	if (context) {
@@ -143,7 +142,7 @@
 	 * Increment the refcount to avoid freeing the context while
 	 * cancelling its events
 	 */
-	kgsl_context_get(context);
+	_kgsl_context_get(context);
 
 	/* Remove ourselves from the master pending list */
 	list_del_init(&context->events_list);
@@ -320,7 +319,7 @@
 		 * Increment the refcount to make sure that the list_del_init
 		 * is called with a valid context's list
 		 */
-		kgsl_context_get(context);
+		_kgsl_context_get(context);
 		/*
 		 * If kgsl_timestamp_expired_context returns 0 then it no longer
 		 * has any pending events and can be removed from the list
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 52d7154..452d8a3 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1261,13 +1261,14 @@
 			(unsigned int *) &context_id,
 			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
 				current_context));
-		context = idr_find(&device->context_idr, context_id);
+		context = kgsl_context_get(device, context_id);
 		if (context)
 			ts_processed = kgsl_readtimestamp(device, context,
 				KGSL_TIMESTAMP_RETIRED);
 		KGSL_PWR_INFO(device, "Wake from %s state. CTXT: %d RTRD TS: %08X\n",
 			kgsl_pwrstate_to_str(state),
 			context ? context->id : -1, ts_processed);
+		kgsl_context_put(context);
 		/* fall through */
 	case KGSL_STATE_NAP:
 		/* Turn on the core clocks */
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index a5aa42f..e200294 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -139,7 +139,9 @@
 	/* Figure out how many active contexts there are - these will
 	 * be appended on the end of the structure */
 
+	rcu_read_lock();
 	idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
+	rcu_read_unlock();
 
 	size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
 
@@ -187,8 +189,9 @@
 
 	/* append information for each context */
 	_ctxtptr = snapshot + sizeof(*header);
+	rcu_read_lock();
 	idr_for_each(&device->context_idr, snapshot_context_info, NULL);
-
+	rcu_read_unlock();
 	/* Return the size of the data segment */
 	return size;
 }
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
index 813305a..5604e76 100644
--- a/drivers/gpu/msm/kgsl_sync.c
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -118,16 +118,19 @@
 	if (len != sizeof(priv))
 		return -EINVAL;
 
-	context = kgsl_find_context(owner, context_id);
-	if (context == NULL)
-		return -EINVAL;
-
 	event = kzalloc(sizeof(*event), GFP_KERNEL);
 	if (event == NULL)
 		return -ENOMEM;
+
+	context = kgsl_context_get_owner(owner, context_id);
+
+	if (context == NULL) {
+		kfree(event);
+		return -EINVAL;
+	}
+
 	event->context = context;
 	event->timestamp = timestamp;
-	kgsl_context_get(context);
 
 	pt = kgsl_sync_pt_create(context->timeline, timestamp);
 	if (pt == NULL) {
@@ -158,6 +161,10 @@
 		goto fail_copy_fd;
 	}
 
+	/*
+	 * Hold the context ref-count for the event - it will get released in
+	 * the callback
+	 */
 	ret = kgsl_add_event(device, context_id, timestamp,
 			kgsl_fence_event_cb, event, owner);
 	if (ret)