msm: kgsl: reference count struct kgsl_context

Per-context timestamps introduced a race condition
between the context destroy ioctl and the waittimestamp
ioctl. The waittimestamp ioctl release device->mutex
while it is waiting to prevent deadlock. It also has
a context pointer, which could be freed from a different
thread while the waiting thread was blocked.

Fix this by adding a reference count to the context
structure, which must be held by any code that maintains
a pointer to a context while the device mutex is not
held. Currently this only happens via waittimestamp.

The "main" reference count removed by userspace
requesting the context to be destroyed. When this
happens kgsl_context_detach() is called, which does
a partial cleanup of the context so that it can
no longer be used to issue commands. Once a context
has been detached, its id field is set to
KGSL_CONTEXT_INVALID. Unfortunately this is needed
by adreno_waittimestamp() so it can correctly stop
waiting in this case. Cleaning up adreno_waittimestamp()
will need to be handled in a separate patch.

CRs-Fixed: 355155
Change-Id: Ib934b467cd077b5ee774de5f297660e418d693e5
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 556cb3f..1432ccf 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1141,16 +1141,12 @@
 static unsigned int _get_context_id(struct kgsl_context *k_ctxt)
 {
 	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
-
 	if (k_ctxt != NULL) {
 		struct adreno_context *a_ctxt = k_ctxt->devctxt;
-		/*
-		 * if the context was not created with per context timestamp
-		 * support, we must use the global timestamp since issueibcmds
-		 * will be returning that one.
-		 */
-		if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
-			context_id = a_ctxt->id;
+		if (k_ctxt->id == KGSL_CONTEXT_INVALID || a_ctxt == NULL)
+			context_id = KGSL_CONTEXT_INVALID;
+		else if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
+			context_id = k_ctxt->id;
 	}
 
 	return context_id;
@@ -1161,11 +1157,22 @@
 {
 	int status;
 	unsigned int ref_ts, enableflag;
-	unsigned int context_id = _get_context_id(context);
+	unsigned int context_id;
+
+	mutex_lock(&device->mutex);
+	context_id = _get_context_id(context);
+	/*
+	 * If the context ID is invalid, we are in a race with
+	 * the context being destroyed by userspace so bail.
+	 */
+	if (context_id == KGSL_CONTEXT_INVALID) {
+		KGSL_DRV_WARN(device, "context was detached");
+		status = -EINVAL;
+		goto unlock;
+	}
 
 	status = kgsl_check_timestamp(device, context, timestamp);
 	if (!status) {
-		mutex_lock(&device->mutex);
 		kgsl_sharedmem_readl(&device->memstore, &enableflag,
 			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
 		mb();
@@ -1199,8 +1206,9 @@
 			adreno_ringbuffer_issuecmds(device, KGSL_CMD_FLAGS_NONE,
 				&cmds[0], 2);
 		}
-		mutex_unlock(&device->mutex);
 	}
+unlock:
+	mutex_unlock(&device->mutex);
 
 	return status;
 }
@@ -1259,6 +1267,15 @@
 	msecs_first = (msecs <= 100) ? ((msecs + 4) / 5) : 100;
 	msecs_part = (msecs - msecs_first + 3) / 4;
 	for (retries = 0; retries < 5; retries++) {
+		/*
+		 * If the context ID is invalid, we are in a race with
+		 * the context being destroyed by userspace so bail.
+		 */
+		if (context_id == KGSL_CONTEXT_INVALID) {
+			KGSL_DRV_WARN(device, "context was detached");
+			status = -EINVAL;
+			goto done;
+		}
 		if (kgsl_check_timestamp(device, context, timestamp)) {
 			/* if the timestamp happens while we're not
 			 * waiting, there's a chance that an interrupt
@@ -1320,6 +1337,14 @@
 	unsigned int timestamp = 0;
 	unsigned int context_id = _get_context_id(context);
 
+	/*
+	 * If the context ID is invalid, we are in a race with
+	 * the context being destroyed by userspace so bail.
+	 */
+	if (context_id == KGSL_CONTEXT_INVALID) {
+		KGSL_DRV_WARN(device, "context was detached");
+		return timestamp;
+	}
 	switch (type) {
 	case KGSL_TIMESTAMP_QUEUED: {
 		struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index adf2772..6980f55 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -363,28 +363,48 @@
 		return NULL;
 	}
 
+	kref_init(&context->refcount);
 	context->id = id;
 	context->dev_priv = dev_priv;
 
 	return context;
 }
 
-static void
-kgsl_destroy_context(struct kgsl_device_private *dev_priv,
-		     struct kgsl_context *context)
+/**
+ * kgsl_context_detach - Release the "master" context reference
+ * @context - The context that will be detached
+ *
+ * This is called when a context becomes unusable, because userspace
+ * has requested for it to be destroyed. The context itself may
+ * exist a bit longer until its reference count goes to zero.
+ * Other code referencing the context can detect that it has been
+ * detached because the context id will be set to KGSL_CONTEXT_INVALID.
+ */
+void
+kgsl_context_detach(struct kgsl_context *context)
 {
 	int id;
-
+	struct kgsl_device *device;
 	if (context == NULL)
 		return;
-
-	/* Fire a bug if the devctxt hasn't been freed */
-	BUG_ON(context->devctxt);
-
+	device = context->dev_priv->device;
+	kgsl_cancel_events_ctxt(device, context);
 	id = context->id;
-	kfree(context);
+	if (device->ftbl->drawctxt_destroy)
+		device->ftbl->drawctxt_destroy(device, context);
+	/*device specific drawctxt_destroy MUST clean up devctxt */
+	BUG_ON(context->devctxt);
+	idr_remove(&device->context_idr, id);
+	context->id = KGSL_CONTEXT_INVALID;
+	kgsl_context_put(context);
+}
 
-	idr_remove(&dev_priv->device->context_idr, id);
+void
+kgsl_context_destroy(struct kref *kref)
+{
+	struct kgsl_context *context = container_of(kref, struct kgsl_context,
+						    refcount);
+	kfree(context);
 }
 
 void kgsl_timestamp_expired(struct work_struct *work)
@@ -768,10 +788,8 @@
 		if (context == NULL)
 			break;
 
-		if (context->dev_priv == dev_priv) {
-			device->ftbl->drawctxt_destroy(device, context);
-			kgsl_destroy_context(dev_priv, context);
-		}
+		if (context->dev_priv == dev_priv)
+			kgsl_context_detach(context);
 
 		next = next + 1;
 	}
@@ -1034,6 +1052,7 @@
 {
 	struct kgsl_device_waittimestamp_ctxtid *param = data;
 	struct kgsl_context *context;
+	int result;
 
 	context = kgsl_find_context(dev_priv, param->context_id);
 	if (context == NULL) {
@@ -1041,9 +1060,16 @@
 			param->context_id);
 		return -EINVAL;
 	}
-
-	return _device_waittimestamp(dev_priv, context,
+	/*
+	 * A reference count is needed here, because waittimestamp may
+	 * block with the device mutex unlocked and userspace could
+	 * request for the context to be destroyed during that time.
+	 */
+	kgsl_context_get(context);
+	result = _device_waittimestamp(dev_priv, context,
 			param->timestamp, param->timeout);
+	kgsl_context_put(context);
+	return result;
 }
 
 static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
@@ -1261,7 +1287,7 @@
 	param->drawctxt_id = context->id;
 done:
 	if (result && context)
-		kgsl_destroy_context(dev_priv, context);
+		kgsl_context_detach(context);
 
 	return result;
 }
@@ -1280,14 +1306,7 @@
 		goto done;
 	}
 
-	kgsl_cancel_events_ctxt(dev_priv->device, context);
-
-	if (dev_priv->device->ftbl->drawctxt_destroy)
-		dev_priv->device->ftbl->drawctxt_destroy(dev_priv->device,
-			context);
-
-	kgsl_destroy_context(dev_priv, context);
-
+	kgsl_context_detach(context);
 done:
 	return result;
 }
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 0964458..5b6522a 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -220,6 +220,7 @@
 	.last_expired_ctxt_id = KGSL_CONTEXT_INVALID
 
 struct kgsl_context {
+	struct kref refcount;
 	uint32_t id;
 
 	/* Pointer to the owning device instance */
@@ -380,4 +381,32 @@
 	return pdev->dev.platform_data;
 }
 
+/**
+ * kgsl_context_get - Get context reference count
+ * @context
+ *
+ * Asynchronous code that holds a pointer to a context
+ * must hold a reference count on it. The kgsl device
+ * mutex must be held while the context reference count
+ * is changed.
+ */
+static inline void
+kgsl_context_get(struct kgsl_context *context)
+{
+	kref_get(&context->refcount);
+}
+
+void kgsl_context_destroy(struct kref *kref);
+
+/**
+ * kgsl_context_put - Release context reference count
+ * @context
+ *
+ */
+static inline void
+kgsl_context_put(struct kgsl_context *context)
+{
+	kref_put(&context->refcount, kgsl_context_destroy);
+}
+
 #endif  /* __KGSL_DEVICE_H */