msm: kgsl: allow consistent CPU and GPU mappings

KGSL_GPUMEM_ALLOC_ID now takes a flag,
KGSL_MEMFLAGS_USE_CPU_MAP. When set, the GPU
mapping will be set up to match the CPU mapping
during mmap().  This feature is only supported when
using per process pagetables with the IOMMU. The
flags field of KGSL_GPUMEM_ALLOC_ID is copied back
to userspace and KGSL_MEMFLAGS_USE_CPU_MAP will
be cleared when this feature is not supported.

The IOMMU virtual address space has been adjusted
when perprocess pagetables is enabled so that the
entire userpace address range (0 to TASK_SIZE) can
have equivalent mappings on the IOMMU. For buffers
that do not have equivalent mappings, the address
range from PAGE_OFFSET to KGSL_IOMMU_GLOBAL_MEM_BASE
is used.

Change-Id: Ib61c03aa7453c3dd901c41e8fd297f66d402ae1a
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 637ae3c..9f2df27 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -28,6 +28,7 @@
 #include <linux/msm_ion.h>
 #include <linux/io.h>
 #include <mach/socinfo.h>
+#include <linux/mman.h>
 
 #include "kgsl.h"
 #include "kgsl_debugfs.h"
@@ -855,6 +856,58 @@
 }
 
 /**
+ * kgsl_sharedmem_region_empty - Check if an addression region is empty
+ *
+ * @private: private data for the process to check.
+ * @gpuaddr: start address of the region
+ * @size: length of the region.
+ *
+ * Checks that there are no existing allocations within an address
+ * region. Note that unlike other kgsl_sharedmem* search functions,
+ * this one manages locking on its own.
+ */
+int
+kgsl_sharedmem_region_empty(struct kgsl_process_private *private,
+	unsigned int gpuaddr, size_t size)
+{
+	int result = 1;
+	unsigned int gpuaddr_end = gpuaddr + size;
+
+	struct rb_node *node = private->mem_rb.rb_node;
+
+	if (!kgsl_mmu_gpuaddr_in_range(gpuaddr))
+		return 0;
+
+	/* don't overflow */
+	if (gpuaddr_end < gpuaddr)
+		return 0;
+
+	spin_lock(&private->mem_lock);
+	node = private->mem_rb.rb_node;
+	while (node != NULL) {
+		struct kgsl_mem_entry *entry;
+		unsigned int memdesc_start, memdesc_end;
+
+		entry = rb_entry(node, struct kgsl_mem_entry, node);
+
+		memdesc_start = entry->memdesc.gpuaddr;
+		memdesc_end = memdesc_start
+				+ kgsl_memdesc_mmapsize(&entry->memdesc);
+
+		if (gpuaddr_end <= memdesc_start)
+			node = node->rb_left;
+		else if (memdesc_end <= gpuaddr)
+			node = node->rb_right;
+		else {
+			result = 0;
+			break;
+		}
+	}
+	spin_unlock(&private->mem_lock);
+	return result;
+}
+
+/**
  * kgsl_sharedmem_find_id - find a memory entry by id
  * @process: the owning process
  * @id: id to find
@@ -1422,6 +1475,8 @@
 	entry->memdesc.size = size;
 	entry->memdesc.physaddr = phys + offset;
 	entry->memdesc.hostptr = (void *) (virt + offset);
+	/* USE_CPU_MAP is not impemented for PMEM. */
+	entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	ret = memdesc_sg_phys(&entry->memdesc, phys + offset, size);
 	if (ret)
@@ -1538,6 +1593,8 @@
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = size;
 	entry->memdesc.useraddr = useraddr + (offset & PAGE_MASK);
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
+		entry->memdesc.gpuaddr = entry->memdesc.useraddr;
 
 	return memdesc_sg_virt(&entry->memdesc, entry->memdesc.useraddr,
 				size);
@@ -1590,6 +1647,8 @@
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = ALIGN(size, PAGE_SIZE);
 	entry->memdesc.useraddr = useraddr;
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
+		entry->memdesc.gpuaddr = entry->memdesc.useraddr;
 
 	ret = memdesc_sg_virt(&entry->memdesc, useraddr, size);
 	if (ret)
@@ -1630,6 +1689,8 @@
 	entry->priv_data = handle;
 	entry->memdesc.pagetable = pagetable;
 	entry->memdesc.size = 0;
+	/* USE_CPU_MAP is not impemented for ION. */
+	entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	sg_table = ion_sg_table(kgsl_ion_client, handle);
 
@@ -1653,6 +1714,13 @@
 	return -ENOMEM;
 }
 
+static inline int
+can_use_cpu_map(void)
+{
+	return (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU
+		&& kgsl_mmu_is_perprocess());
+}
+
 static long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
 				     unsigned int cmd, void *data)
 {
@@ -1680,9 +1748,12 @@
 	 */
 	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
 			| KGSL_MEMTYPE_MASK
-			| KGSL_MEMALIGN_MASK;
+			| KGSL_MEMALIGN_MASK
+			| KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	entry->memdesc.flags = param->flags;
+	if (!can_use_cpu_map())
+		entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	switch (memtype) {
 	case KGSL_USER_MEM_TYPE_PMEM:
@@ -1861,7 +1932,7 @@
 	return _kgsl_gpumem_sync_cache(entry, param->op);
 }
 
-/* Legacy cache function, does a flush (clean  + inv) */
+/* Legacy cache function, does a flush (clean  + invalidate) */
 
 static long
 kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
@@ -1903,7 +1974,8 @@
 	flags &= KGSL_MEMFLAGS_GPUREADONLY
 		| KGSL_CACHEMODE_MASK
 		| KGSL_MEMTYPE_MASK
-		| KGSL_MEMALIGN_MASK;
+		| KGSL_MEMALIGN_MASK
+		| KGSL_MEMFLAGS_USE_CPU_MAP;
 
 	entry = kgsl_mem_entry_create();
 	if (entry == NULL)
@@ -1934,6 +2006,7 @@
 	struct kgsl_mem_entry *entry = NULL;
 	int result;
 
+	param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
 	result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
 	if (result)
 		return result;
@@ -1969,14 +2042,19 @@
 	struct kgsl_mem_entry *entry = NULL;
 	int result;
 
+	if (!can_use_cpu_map())
+		param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
+
 	result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
 	if (result != 0)
 		goto err;
 
-	result = kgsl_mmu_map(private->pagetable, &entry->memdesc,
-			kgsl_memdesc_protflags(&entry->memdesc));
-	if (result)
-		goto err;
+	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		result = kgsl_mmu_map(private->pagetable, &entry->memdesc,
+				kgsl_memdesc_protflags(&entry->memdesc));
+		if (result)
+			goto err;
+	}
 
 	result = kgsl_mem_entry_attach_process(entry, private);
 	if (result != 0)
@@ -1988,7 +2066,7 @@
 	param->id = entry->id;
 	param->flags = entry->memdesc.flags;
 	param->size = entry->memdesc.size;
-	param->mmapsize = entry->memdesc.size;
+	param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
 	param->gpuaddr = entry->memdesc.gpuaddr;
 	return result;
 err:
@@ -2031,7 +2109,7 @@
 	param->id = entry->id;
 	param->flags = entry->memdesc.flags;
 	param->size = entry->memdesc.size;
-	param->mmapsize = entry->memdesc.size;
+	param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
 	param->useraddr = entry->memdesc.useraddr;
 	return result;
 }
@@ -2429,6 +2507,142 @@
 	.close = kgsl_gpumem_vm_close,
 };
 
+static int
+get_mmap_entry(struct kgsl_process_private *private,
+		struct kgsl_mem_entry **out_entry, unsigned long pgoff,
+		unsigned long len)
+{
+	int ret = -EINVAL;
+	struct kgsl_mem_entry *entry;
+
+	entry = kgsl_sharedmem_find_id(private, pgoff);
+	if (entry == NULL) {
+		spin_lock(&private->mem_lock);
+		entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT);
+		spin_unlock(&private->mem_lock);
+	}
+
+	if (!entry)
+		return -EINVAL;
+
+	kgsl_mem_entry_get(entry);
+
+	if (!entry->memdesc.ops ||
+		!entry->memdesc.ops->vmflags ||
+		!entry->memdesc.ops->vmfault) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	if (entry->memdesc.useraddr != 0) {
+		ret = -EBUSY;
+		goto err_put;
+	}
+
+	if (len != kgsl_memdesc_mmapsize(&entry->memdesc)) {
+		ret = -ERANGE;
+		goto err_put;
+	}
+
+	*out_entry = entry;
+	return 0;
+err_put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+static unsigned long
+kgsl_get_unmapped_area(struct file *file, unsigned long addr,
+			unsigned long len, unsigned long pgoff,
+			unsigned long flags)
+{
+	unsigned long ret = 0;
+	unsigned long vma_offset = pgoff << PAGE_SHIFT;
+	struct kgsl_device_private *dev_priv = file->private_data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_mem_entry *entry = NULL;
+	unsigned int align;
+	unsigned int retry = 0;
+
+	if (vma_offset == device->memstore.gpuaddr)
+		return get_unmapped_area(NULL, addr, len, pgoff, flags);
+
+	ret = get_mmap_entry(private, &entry, pgoff, len);
+	if (ret)
+		return ret;
+
+	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc) || (flags & MAP_FIXED)) {
+		/*
+		 * If we're not going to use the same mapping on the gpu,
+		 * any address is fine.
+		 * For MAP_FIXED, hopefully the caller knows what they're doing,
+		 * but we may fail in mmap() if there is already something
+		 * at the virtual address chosen.
+		 */
+		ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
+		goto put;
+	}
+	if (entry->memdesc.gpuaddr != 0) {
+		KGSL_MEM_INFO(device,
+				"pgoff %lx already mapped to gpuaddr %x\n",
+				pgoff, entry->memdesc.gpuaddr);
+		ret = -EBUSY;
+		goto put;
+	}
+
+	align = kgsl_memdesc_get_align(&entry->memdesc);
+	if (align >= ilog2(SZ_1M))
+		align = ilog2(SZ_1M);
+	else if (align >= ilog2(SZ_64K))
+		align = ilog2(SZ_64K);
+	else if (align <= PAGE_SHIFT)
+		align = 0;
+
+	if (align)
+		len += 1 << align;
+	do {
+		ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
+		if (IS_ERR_VALUE(ret))
+			break;
+		if (align)
+			ret = ALIGN(ret, (1 << align));
+
+		/*make sure there isn't a GPU only mapping at this address */
+		if (kgsl_sharedmem_region_empty(private, ret, len))
+			break;
+
+		trace_kgsl_mem_unmapped_area_collision(entry, addr, len, ret);
+
+		/*
+		 * If we collided, bump the hint address so that
+		 * get_umapped_area knows to look somewhere else.
+		 */
+		addr = (addr == 0) ? ret + len : addr + len;
+
+		/*
+		 * The addr hint can be set by userspace to be near
+		 * the end of the address space. Make sure we search
+		 * the whole address space at least once by wrapping
+		 * back around once.
+		 */
+		if (!retry && (addr + len >= TASK_SIZE)) {
+			addr = 0;
+			retry = 1;
+		} else {
+			ret = -EBUSY;
+		}
+	} while (addr + len < TASK_SIZE);
+
+	if (IS_ERR_VALUE(ret))
+		KGSL_MEM_INFO(device,
+				"pid %d pgoff %lx len %ld failed error %ld\n",
+				private->pid, pgoff, len, ret);
+put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
 static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
 {
 	unsigned int ret, cache;
@@ -2443,35 +2657,21 @@
 	if (vma_offset == device->memstore.gpuaddr)
 		return kgsl_mmap_memstore(device, vma);
 
-	/* Find a chunk of GPU memory */
-	entry = kgsl_sharedmem_find_id(private, vma->vm_pgoff);
+	ret = get_mmap_entry(private, &entry, vma->vm_pgoff,
+				vma->vm_end - vma->vm_start);
+	if (ret)
+		return ret;
 
-	if (entry == NULL) {
-		spin_lock(&private->mem_lock);
-		entry = kgsl_sharedmem_find(private, vma_offset);
-		spin_unlock(&private->mem_lock);
-	}
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		entry->memdesc.gpuaddr = vma->vm_start;
 
-	if (entry)
-		kgsl_mem_entry_get(entry);
-	else
-		return -EINVAL;
-
-	if (!entry->memdesc.ops ||
-		!entry->memdesc.ops->vmflags ||
-		!entry->memdesc.ops->vmfault) {
-		ret = -EINVAL;
-		goto err_put;
-	}
-
-	if (entry->memdesc.useraddr != 0) {
-		ret = -EBUSY;
-		goto err_put;
-	}
-
-	if (entry->memdesc.size != (vma->vm_end - vma->vm_start)) {
-		ret = -ERANGE;
-		goto err_put;
+		ret = kgsl_mmu_map(private->pagetable, &entry->memdesc,
+				   kgsl_memdesc_protflags(&entry->memdesc));
+		if (ret) {
+			kgsl_mem_entry_put(entry);
+			return ret;
+		}
+		kgsl_mem_entry_track_gpuaddr(private, entry);
 	}
 
 	vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc);
@@ -2503,10 +2703,9 @@
 
 	entry->memdesc.useraddr = vma->vm_start;
 
+	trace_kgsl_mem_mmap(entry);
+
 	return 0;
-err_put:
-	kgsl_mem_entry_put(entry);
-	return ret;
 }
 
 static irqreturn_t kgsl_irq_handler(int irq, void *data)
@@ -2522,6 +2721,7 @@
 	.release = kgsl_release,
 	.open = kgsl_open,
 	.mmap = kgsl_mmap,
+	.get_unmapped_area = kgsl_get_unmapped_area,
 	.unlocked_ioctl = kgsl_ioctl,
 };
 
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index faf5d14..1a378c4 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -226,6 +226,10 @@
 static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
 				unsigned int gpuaddr, unsigned int size)
 {
+	/* don't overflow */
+	if ((gpuaddr + size) < gpuaddr)
+		return 0;
+
 	if (gpuaddr >= memdesc->gpuaddr &&
 	    ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
 		return 1;
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index ab3c478..a4bbee2 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -209,36 +209,59 @@
 	return table[kgsl_memdesc_get_cachemode(m)];
 }
 
+static void print_mem_entry(struct seq_file *s, struct kgsl_mem_entry *entry)
+{
+	char flags[6];
+	char usage[16];
+	struct kgsl_memdesc *m = &entry->memdesc;
+
+	flags[0] = kgsl_memdesc_is_global(m) ?  'g' : '-';
+	flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
+	flags[2] = get_alignflag(m);
+	flags[3] = get_cacheflag(m);
+	flags[4] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
+	flags[5] = '\0';
+
+	kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+	seq_printf(s, "%08x %8d %5d %5s %10s %16s %5d\n",
+			m->gpuaddr, m->size, entry->id, flags,
+			memtype_str(entry->memtype), usage, m->sglen);
+}
+
 static int process_mem_print(struct seq_file *s, void *unused)
 {
 	struct kgsl_mem_entry *entry;
 	struct rb_node *node;
 	struct kgsl_process_private *private = s->private;
-	char flags[5];
-	char usage[16];
+	int next = 0;
 
-	spin_lock(&private->mem_lock);
 	seq_printf(s, "%8s %8s %5s %5s %10s %16s %5s\n",
 		   "gpuaddr", "size", "id", "flags", "type", "usage", "sglen");
+
+	/* print all entries with a GPU address */
+	spin_lock(&private->mem_lock);
+
 	for (node = rb_first(&private->mem_rb); node; node = rb_next(node)) {
-		struct kgsl_memdesc *m;
-
 		entry = rb_entry(node, struct kgsl_mem_entry, node);
-		m = &entry->memdesc;
-
-		flags[0] = kgsl_memdesc_is_global(m) ?  'g' : '-';
-		flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
-		flags[2] = get_alignflag(m);
-		flags[3] = get_cacheflag(m);
-		flags[4] = '\0';
-
-		kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
-
-		seq_printf(s, "%08x %8d %5d %5s %10s %16s %5d\n",
-			   m->gpuaddr, m->size, entry->id, flags,
-			   memtype_str(entry->memtype), usage, m->sglen);
+		print_mem_entry(s, entry);
 	}
+
 	spin_unlock(&private->mem_lock);
+
+	/* now print all the unbound entries */
+	while (1) {
+		rcu_read_lock();
+		entry = idr_get_next(&private->mem_idr, &next);
+		rcu_read_unlock();
+
+		if (entry == NULL)
+			break;
+		if (entry->memdesc.gpuaddr == 0)
+			print_mem_entry(s, entry);
+		next++;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index 25edc16..f9e96b9 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -27,9 +27,6 @@
 #include "kgsl_sharedmem.h"
 #include "adreno.h"
 
-#define KGSL_MMU_ALIGN_SHIFT    13
-#define KGSL_MMU_ALIGN_MASK     (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
-
 static enum kgsl_mmutype kgsl_mmu_type;
 
 static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable);
@@ -477,10 +474,10 @@
 	if ((KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) &&
 		((KGSL_MMU_GLOBAL_PT == name) ||
 		(KGSL_MMU_PRIV_BANK_TABLE_NAME == name))) {
-		pagetable->kgsl_pool = gen_pool_create(PAGE_SHIFT, -1);
+		pagetable->kgsl_pool = gen_pool_create(ilog2(SZ_8K), -1);
 		if (pagetable->kgsl_pool == NULL) {
 			KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
-					KGSL_MMU_ALIGN_SHIFT);
+					ilog2(SZ_8K));
 			goto err_alloc;
 		}
 		if (gen_pool_add(pagetable->kgsl_pool,
@@ -491,10 +488,10 @@
 		}
 	}
 
-	pagetable->pool = gen_pool_create(KGSL_MMU_ALIGN_SHIFT, -1);
+	pagetable->pool = gen_pool_create(PAGE_SHIFT, -1);
 	if (pagetable->pool == NULL) {
 		KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
-			      KGSL_MMU_ALIGN_SHIFT);
+			      PAGE_SHIFT);
 		goto err_kgsl_pool;
 	}
 
@@ -615,7 +612,7 @@
 				unsigned int protflags)
 {
 	int ret;
-	struct gen_pool *pool;
+	struct gen_pool *pool = NULL;
 	int size;
 	int page_align = ilog2(PAGE_SIZE);
 
@@ -665,6 +662,10 @@
 				  pagetable->name);
 				return -EINVAL;
 			}
+		} else if (kgsl_memdesc_use_cpu_map(memdesc)) {
+			if (memdesc->gpuaddr == 0)
+				return -EINVAL;
+			pool = NULL;
 		}
 	}
 	if (pool) {
@@ -756,9 +757,11 @@
 
 	pool = pagetable->pool;
 
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()
-		&& kgsl_memdesc_is_global(memdesc)) {
-		pool = pagetable->kgsl_pool;
+	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+		if (kgsl_memdesc_is_global(memdesc))
+			pool = pagetable->kgsl_pool;
+		else if (kgsl_memdesc_use_cpu_map(memdesc))
+			pool = NULL;
 	}
 	if (pool)
 		gen_pool_free(pool, memdesc->gpuaddr, size);
@@ -895,8 +898,13 @@
 {
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
 		return 1;
-	return ((gpuaddr >= KGSL_PAGETABLE_BASE) &&
-		(gpuaddr < (KGSL_PAGETABLE_BASE + kgsl_mmu_get_ptsize())));
+	if (gpuaddr >= kgsl_mmu_get_base_addr() &&
+		gpuaddr < kgsl_mmu_get_base_addr() + kgsl_mmu_get_ptsize())
+		return 1;
+	if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU
+		&& kgsl_mmu_is_perprocess())
+		return (gpuaddr > 0 && gpuaddr < TASK_SIZE);
+	return 0;
 }
 EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
 
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index bf330ee..9d1bffa 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -19,11 +19,10 @@
  * These defines control the address range for allocations that
  * are mapped into all pagetables.
  */
-#define KGSL_IOMMU_GLOBAL_MEM_BASE	0xC0000000
+#define KGSL_IOMMU_GLOBAL_MEM_BASE	0xf8000000
 #define KGSL_IOMMU_GLOBAL_MEM_SIZE	SZ_4M
 
-#define KGSL_MMU_ALIGN_SHIFT    13
-#define KGSL_MMU_ALIGN_MASK     (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
+#define KGSL_MMU_ALIGN_MASK     (~((1 << PAGE_SHIFT) - 1))
 
 /* Identifier for the global page table */
 /* Per process page tables will probably pass in the thread group
@@ -381,32 +380,52 @@
 /*
  * kgsl_mmu_base_addr() - Get gpu virtual address base.
  *
- * Returns the start address of the gpu
- * virtual address space.
+ * Returns the start address of the allocatable gpu
+ * virtual address space. Other mappings that mirror
+ * the CPU address space are possible outside this range.
  */
 static inline unsigned int kgsl_mmu_get_base_addr(void)
 {
-	return KGSL_PAGETABLE_BASE;
+	if (KGSL_MMU_TYPE_GPU == kgsl_mmu_get_mmutype()
+		|| !kgsl_mmu_is_perprocess())
+		return KGSL_PAGETABLE_BASE;
+	/*
+	 * This is the start of the kernel address
+	 * space, so allocations from this range will
+	 * never conflict with userpace addresses
+	 */
+	return PAGE_OFFSET;
 }
 
 /*
  * kgsl_mmu_get_ptsize() - Get gpu pagetable size
  *
- * Returns the usable size of the gpu address space.
+ * Returns the usable size of the gpu allocatable
+ * address space.
  */
 static inline unsigned int kgsl_mmu_get_ptsize(void)
 {
 	/*
-	 * For IOMMU, we could do up to 4G virtual range if we wanted to, but
-	 * it makes more sense to return a smaller range and leave the rest of
-	 * the virtual range for future improvements
+	 * For IOMMU per-process pagetables, the allocatable range
+	 * and the kernel global range must both be outside
+	 * the userspace address range. There is a 1Mb gap
+	 * between these address ranges to make overrun
+	 * detection easier.
+	 * For the shared pagetable case use 2GB and because
+	 * mirroring the CPU address space is not possible and
+	 * we're better off with extra room.
 	 */
 	enum kgsl_mmutype mmu_type = kgsl_mmu_get_mmutype();
 
 	if (KGSL_MMU_TYPE_GPU == mmu_type)
 		return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
-	else if (KGSL_MMU_TYPE_IOMMU == mmu_type)
-		return SZ_2G;
+	else if (KGSL_MMU_TYPE_IOMMU == mmu_type) {
+		if (kgsl_mmu_is_perprocess())
+			return KGSL_IOMMU_GLOBAL_MEM_BASE
+				- kgsl_mmu_get_base_addr() - SZ_1M;
+		else
+			return SZ_2G;
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index d94002b..9ee4b3f 100755
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -558,7 +558,8 @@
 	page_size = (align >= ilog2(SZ_64K) && size >= SZ_64K)
 			? SZ_64K : PAGE_SIZE;
 	/* update align flags for what we actually use */
-	kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+	if (page_size != PAGE_SIZE)
+		kgsl_memdesc_set_align(memdesc, ilog2(page_size));
 
 	/*
 	 * There needs to be enough room in the sg structure to be able to
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
index 2052c9a..e31b8f3 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -186,6 +186,18 @@
 }
 
 /*
+ * kgsl_memdesc_has_guard_page - is the last page a guard page?
+ * @memdesc - the memdesc
+ *
+ * Returns nonzero if there is a guard page, 0 otherwise
+ */
+static inline int
+kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0;
+}
+
+/*
  * kgsl_memdesc_protflags - get mmu protection flags
  * @memdesc - the memdesc
  * Returns a mask of GSL_PT_PAGE* values based on the
@@ -200,6 +212,35 @@
 	return protflags;
 }
 
+/*
+ * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
+ * @memdesc - the memdesc
+ */
+static inline int
+kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0;
+}
+
+/*
+ * kgsl_memdesc_mmapsize - get the size of the mmap region
+ * @memdesc - the memdesc
+ *
+ * The entire memdesc must be mapped. Additionally if the
+ * CPU mapping is going to be mirrored, there must be room
+ * for the guard page to be mapped so that the address spaces
+ * match up.
+ */
+static inline unsigned int
+kgsl_memdesc_mmapsize(const struct kgsl_memdesc *memdesc)
+{
+	unsigned int size = memdesc->size;
+	if (kgsl_memdesc_use_cpu_map(memdesc) &&
+		kgsl_memdesc_has_guard_page(memdesc))
+		size += SZ_4K;
+	return size;
+}
+
 static inline int
 kgsl_allocate(struct kgsl_memdesc *memdesc,
 		struct kgsl_pagetable *pagetable, size_t size)
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index 1e42cbd..f095c6a 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -338,6 +338,68 @@
 	)
 );
 
+TRACE_EVENT(kgsl_mem_mmap,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, useraddr)
+		__field(unsigned int, gpuaddr)
+		__field(unsigned int, size)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(unsigned int, flags)
+	),
+
+	TP_fast_assign(
+		__entry->useraddr = mem_entry->memdesc.useraddr;
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->flags = mem_entry->memdesc.flags;
+	),
+
+	TP_printk(
+		"useraddr=%lx gpuaddr=0x%08x size=%d usage=%s id=%d"
+		" flags=0x%08x",
+		__entry->useraddr, __entry->gpuaddr, __entry->size,
+		__entry->usage, __entry->id, __entry->flags
+	)
+);
+
+TRACE_EVENT(kgsl_mem_unmapped_area_collision,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry,
+		 unsigned long hint,
+		 unsigned long len,
+		 unsigned long addr),
+
+	TP_ARGS(mem_entry, hint, len, addr),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned long, hint)
+		__field(unsigned long, len)
+		__field(unsigned long, addr)
+	),
+
+	TP_fast_assign(
+		__entry->id = mem_entry->id;
+		__entry->hint  = hint;
+		__entry->len = len;
+		__entry->addr = addr;
+	),
+
+	TP_printk(
+		"id=%d hint=0x%lx len=%ld addr=0x%lx",
+		__entry->id, __entry->hint, __entry->len, __entry->addr
+	)
+);
+
 TRACE_EVENT(kgsl_mem_map,
 
 	TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd),
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index 7e28798..b7d393f 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -29,6 +29,7 @@
 
 /* General allocation hints */
 #define KGSL_MEMFLAGS_GPUREADONLY 0x01000000
+#define KGSL_MEMFLAGS_USE_CPU_MAP 0x10000000
 
 /* Memory caching hints */
 #define KGSL_CACHEMODE_MASK 0x0C000000
@@ -556,6 +557,8 @@
  * KGSL_MEMFLAGS_GPUREADONLY: the GPU will be unable to write to the buffer
  * KGSL_MEMTYPE*: usage hint for debugging aid
  * KGSL_MEMALIGN*: alignment hint, may be ignored or adjusted by the kernel.
+ * KGSL_MEMFLAGS_USE_CPU_MAP: If set on call and return, the returned GPU
+ * address will be 0. Calling mmap() will set the GPU address.
  */
 struct kgsl_gpumem_alloc_id {
 	unsigned int id;