msm: kgsl: disable use of iommu TTBR1

The v1 iommu only supports splitting between TTBR0 and TTBR1
on a power of two boundary. Cutting off the userspace address
at 2G (0x80000000) is inconvienient, as the GPU userspace
address space should align with the CPU address space.

This requires changing how global allocations are managed,
since there is no longer a separate pagetable for TTBR1.
The default pagetable is still the master of these allocations
and maintains the gen_pool for allocating global addresses.
But now, these regions are mapped into each process pagetable
by calling kgsl_setup_pt(). This requires kgsl_mmu_map
and kgsl_mmu_unmap to be able to handle mapping without
virtual address allocation.

Change-Id: I94e2d63dc7e6a7ef576f993770725b6b7ba14228
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index f11511f..25edc16 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -37,17 +37,18 @@
 static int kgsl_cleanup_pt(struct kgsl_pagetable *pt)
 {
 	int i;
-	/* For IOMMU only unmap the global structures to global pt */
-	if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
-		(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
-		(KGSL_MMU_GLOBAL_PT !=  pt->name) &&
-		(KGSL_MMU_PRIV_BANK_TABLE_NAME !=  pt->name))
-		return 0;
+	struct kgsl_device *device;
+
 	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		struct kgsl_device *device = kgsl_driver.devp[i];
+		device = kgsl_driver.devp[i];
 		if (device)
 			device->ftbl->cleanup_pt(device, pt);
 	}
+	/* Only the 3d device needs mmu specific pt entries */
+	device = kgsl_driver.devp[KGSL_DEVICE_3D0];
+	if (device->mmu.mmu_ops->mmu_cleanup_pt != NULL)
+		device->mmu.mmu_ops->mmu_cleanup_pt(&device->mmu, pt);
+
 	return 0;
 }
 
@@ -56,21 +57,23 @@
 {
 	int i = 0;
 	int status = 0;
+	struct kgsl_device *device;
 
-	/* For IOMMU only map the global structures to global pt */
-	if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
-		(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
-		(KGSL_MMU_GLOBAL_PT !=  pt->name) &&
-		(KGSL_MMU_PRIV_BANK_TABLE_NAME !=  pt->name))
-		return 0;
 	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
-		struct kgsl_device *device = kgsl_driver.devp[i];
+		device = kgsl_driver.devp[i];
 		if (device) {
 			status = device->ftbl->setup_pt(device, pt);
 			if (status)
 				goto error_pt;
 		}
 	}
+	/* Only the 3d device needs mmu specific pt entries */
+	device = kgsl_driver.devp[KGSL_DEVICE_3D0];
+	if (device->mmu.mmu_ops->mmu_setup_pt != NULL) {
+		status = device->mmu.mmu_ops->mmu_setup_pt(&device->mmu, pt);
+		if (status)
+			goto error_pt;
+	}
 	return status;
 error_pt:
 	while (i >= 0) {
@@ -310,22 +313,6 @@
 	return ret;
 }
 
-unsigned int kgsl_mmu_get_ptsize(void)
-{
-	/*
-	 * For IOMMU, we could do up to 4G virtual range if we wanted to, but
-	 * it makes more sense to return a smaller range and leave the rest of
-	 * the virtual range for future improvements
-	 */
-
-	if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type)
-		return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
-	else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type)
-		return SZ_2G - KGSL_PAGETABLE_BASE;
-	else
-		return 0;
-}
-
 int
 kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, unsigned int pt_base)
 {
@@ -511,7 +498,7 @@
 		goto err_kgsl_pool;
 	}
 
-	if (gen_pool_add(pagetable->pool, KGSL_PAGETABLE_BASE,
+	if (gen_pool_add(pagetable->pool, kgsl_mmu_get_base_addr(),
 				ptsize, -1)) {
 		KGSL_CORE_ERR("gen_pool_add failed\n");
 		goto err_pool;
@@ -559,11 +546,7 @@
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
 		return (void *)(-1);
 
-#ifndef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
-	name = KGSL_MMU_GLOBAL_PT;
-#endif
-	/* We presently do not support per-process for IOMMU-v2 */
-	if (!msm_soc_version_supports_iommu_v1())
+	if (!kgsl_mmu_is_perprocess())
 		name = KGSL_MMU_GLOBAL_PT;
 
 	pt = kgsl_get_pagetable(name);
@@ -626,15 +609,6 @@
 	 */
 }
 
-static inline struct gen_pool *
-_get_pool(struct kgsl_pagetable *pagetable, unsigned int flags)
-{
-	if (pagetable->kgsl_pool &&
-		(KGSL_MEMDESC_GLOBAL & flags))
-		return pagetable->kgsl_pool;
-	return pagetable->pool;
-}
-
 int
 kgsl_mmu_map(struct kgsl_pagetable *pagetable,
 				struct kgsl_memdesc *memdesc,
@@ -665,28 +639,48 @@
 
 	size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
 
-	/* Allocate from kgsl pool if it exists for global mappings */
-	pool = _get_pool(pagetable, memdesc->priv);
+	pool = pagetable->pool;
 
-	/* Allocate aligned virtual addresses for iommu. This allows
-	 * more efficient pagetable entries if the physical memory
-	 * is also aligned. Don't do this for GPUMMU, because
-	 * the address space is so small.
-	 */
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype() &&
-	    kgsl_memdesc_get_align(memdesc) > 0)
-		page_align = kgsl_memdesc_get_align(memdesc);
-
-	memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size, page_align);
-	if (memdesc->gpuaddr == 0) {
-		KGSL_CORE_ERR("gen_pool_alloc(%d) failed from pool: %s\n",
-			size,
-			(pool == pagetable->kgsl_pool) ?
-			"kgsl_pool" : "general_pool");
-		KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
-				pagetable->name, pagetable->stats.mapped,
-				pagetable->stats.entries);
-		return -ENOMEM;
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+		/* Allocate aligned virtual addresses for iommu. This allows
+		 * more efficient pagetable entries if the physical memory
+		 * is also aligned. Don't do this for GPUMMU, because
+		 * the address space is so small.
+		 */
+		if (kgsl_memdesc_get_align(memdesc) > 0)
+			page_align = kgsl_memdesc_get_align(memdesc);
+		if (kgsl_memdesc_is_global(memdesc)) {
+			/*
+			 * Only the default pagetable has a kgsl_pool, and
+			 * it is responsible for creating the mapping for
+			 * each global buffer. The mapping will be reused
+			 * in all other pagetables and it must already exist
+			 * when we're creating other pagetables which do not
+			 * have a kgsl_pool.
+			 */
+			pool = pagetable->kgsl_pool;
+			if (pool == NULL && memdesc->gpuaddr == 0) {
+				KGSL_CORE_ERR(
+				  "No address for global mapping into pt %d\n",
+				  pagetable->name);
+				return -EINVAL;
+			}
+		}
+	}
+	if (pool) {
+		memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size,
+							  page_align);
+		if (memdesc->gpuaddr == 0) {
+			KGSL_CORE_ERR("gen_pool_alloc(%d) failed, pool: %s\n",
+					size,
+					(pool == pagetable->kgsl_pool) ?
+					"kgsl_pool" : "general_pool");
+			KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
+					pagetable->name,
+					pagetable->stats.mapped,
+					pagetable->stats.entries);
+			return -ENOMEM;
+		}
 	}
 
 	if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
@@ -713,7 +707,8 @@
 
 err_free_gpuaddr:
 	spin_unlock(&pagetable->lock);
-	gen_pool_free(pool, memdesc->gpuaddr, size);
+	if (pool)
+		gen_pool_free(pool, memdesc->gpuaddr, size);
 	memdesc->gpuaddr = 0;
 	return ret;
 }
@@ -759,14 +754,20 @@
 
 	spin_unlock(&pagetable->lock);
 
-	pool = _get_pool(pagetable, memdesc->priv);
-	gen_pool_free(pool, memdesc->gpuaddr, size);
+	pool = pagetable->pool;
+
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()
+		&& kgsl_memdesc_is_global(memdesc)) {
+		pool = pagetable->kgsl_pool;
+	}
+	if (pool)
+		gen_pool_free(pool, memdesc->gpuaddr, size);
 
 	/*
 	 * Don't clear the gpuaddr on global mappings because they
 	 * may be in use by other pagetables
 	 */
-	if (!(memdesc->priv & KGSL_MEMDESC_GLOBAL))
+	if (!kgsl_memdesc_is_global(memdesc))
 		memdesc->gpuaddr = 0;
 	return 0;
 }