msm-3.4 (commit 35cca8ba3ee0e6a2085dbcac48fb2ccbaa72ba98) video/gpu/iommu .. and all the hacks that goes with that
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index b114875..9d24d65 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3737,10 +3737,6 @@
# define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE (1 << 12)
# define GEN6_RCCUNIT_CLOCK_GATE_DISABLE (1 << 11)
-#define GEN6_UCGCTL2 0x9404
-# define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE (1 << 12)
-# define GEN6_RCCUNIT_CLOCK_GATE_DISABLE (1 << 11)
-
#define GEN6_RPNSWREQ 0xA008
#define GEN6_TURBO_DISABLE (1<<31)
#define GEN6_FREQUENCY(x) ((x)<<25)
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 48dae40..f7eb5d8 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -439,9 +439,6 @@
struct radeon_i2c_chan *ddc_bus;
/* some systems have an hdmi and vga port with a shared ddc line */
bool shared_ddc;
- /* for some Radeon chip families we apply an additional EDID header
- check as part of the DDC probe */
- bool requires_extended_probe;
bool use_digital;
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
@@ -529,8 +526,7 @@
u8 val);
extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector);
extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector);
-extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector,
- bool requires_extended_probe);
+extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
diff --git a/drivers/gpu/ion/Kconfig b/drivers/gpu/ion/Kconfig
index 5bb254b..39133b5 100644
--- a/drivers/gpu/ion/Kconfig
+++ b/drivers/gpu/ion/Kconfig
@@ -16,3 +16,12 @@
depends on ARCH_MSM && ION
help
Choose this option if you wish to use ion on an MSM target.
+
+config ION_LEAK_CHECK
+ bool "Check for leaked Ion buffers (debugging)"
+ depends on ION
+ help
+ Choose this option if you wish to enable checking for leaked
+ ion buffers at runtime. Choosing this option will also add a
+ debugfs node under the ion directory that can be used to
+ enable/disable the leak checking.
diff --git a/drivers/gpu/ion/Makefile b/drivers/gpu/ion/Makefile
index 51349f6..60a6b81 100644
--- a/drivers/gpu/ion/Makefile
+++ b/drivers/gpu/ion/Makefile
@@ -1,4 +1,4 @@
obj-$(CONFIG_ION) += ion.o ion_heap.o ion_system_heap.o ion_carveout_heap.o ion_iommu_heap.o ion_cp_heap.o
-obj-$(CONFIG_CMA) += ion_cma_heap.o
+obj-$(CONFIG_CMA) += ion_cma_heap.o ion_cma_secure_heap.o
obj-$(CONFIG_ION_TEGRA) += tegra/
obj-$(CONFIG_ION_MSM) += msm/
diff --git a/drivers/gpu/ion/ion.c b/drivers/gpu/ion/ion.c
index d005605..d3434d8 100644
--- a/drivers/gpu/ion/ion.c
+++ b/drivers/gpu/ion/ion.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -34,10 +34,11 @@
#include <linux/debugfs.h>
#include <linux/dma-buf.h>
#include <linux/msm_ion.h>
+#include <trace/events/kmem.h>
+
#include <mach/iommu_domains.h>
#include "ion_priv.h"
-#define DEBUG
/**
* struct ion_device - the metadata of the ion device node
@@ -105,6 +106,12 @@
unsigned int iommu_map_cnt;
};
+bool ion_buffer_fault_user_mappings(struct ion_buffer *buffer)
+{
+ return ((buffer->flags & ION_FLAG_CACHED) &&
+ !(buffer->flags & ION_FLAG_CACHED_NEEDS_SYNC));
+}
+
static void ion_iommu_release(struct kref *kref);
/* this function should only be called while dev->lock is held */
@@ -188,6 +195,8 @@
return NULL;
}
+static int ion_buffer_alloc_dirty(struct ion_buffer *buffer);
+
/* this function should only be called while dev->lock is held */
static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
struct ion_device *dev,
@@ -197,13 +206,15 @@
{
struct ion_buffer *buffer;
struct sg_table *table;
- int ret;
+ struct scatterlist *sg;
+ int i, ret;
buffer = kzalloc(sizeof(struct ion_buffer), GFP_KERNEL);
if (!buffer)
return ERR_PTR(-ENOMEM);
buffer->heap = heap;
+ buffer->flags = flags;
kref_init(&buffer->ref);
ret = heap->ops->allocate(heap, buffer, len, align, flags);
@@ -214,19 +225,54 @@
buffer->dev = dev;
buffer->size = len;
- buffer->flags = flags;
- table = buffer->heap->ops->map_dma(buffer->heap, buffer);
+ table = heap->ops->map_dma(heap, buffer);
if (IS_ERR_OR_NULL(table)) {
heap->ops->free(buffer);
kfree(buffer);
return ERR_PTR(PTR_ERR(table));
}
buffer->sg_table = table;
+ if (ion_buffer_fault_user_mappings(buffer)) {
+ for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents,
+ i) {
+ if (sg_dma_len(sg) == PAGE_SIZE)
+ continue;
+ pr_err("%s: cached mappings that will be faulted in "
+ "must have pagewise sg_lists\n", __func__);
+ ret = -EINVAL;
+ goto err;
+ }
+ ret = ion_buffer_alloc_dirty(buffer);
+ if (ret)
+ goto err;
+ }
+
+ buffer->dev = dev;
+ buffer->size = len;
+ INIT_LIST_HEAD(&buffer->vmas);
mutex_init(&buffer->lock);
+ /* this will set up dma addresses for the sglist -- it is not
+ technically correct as per the dma api -- a specific
+ device isn't really taking ownership here. However, in practice on
+ our systems the only dma_address space is physical addresses.
+ Additionally, we can't afford the overhead of invalidating every
+ allocation via dma_map_sg. The implicit contract here is that
+ memory comming from the heaps is ready for dma, ie if it has a
+ cached mapping that mapping has been invalidated */
+ for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
+ if (sg_dma_address(sg) == 0)
+ sg_dma_address(sg) = sg_phys(sg);
+ }
ion_buffer_add(dev, buffer);
return buffer;
+
+err:
+ heap->ops->unmap_dma(heap, buffer);
+ heap->ops->free(buffer);
+ kfree(buffer);
+ return ERR_PTR(ret);
}
/**
@@ -261,6 +307,12 @@
mutex_unlock(&buffer->lock);
}
+static void ion_delayed_unsecure(struct ion_buffer *buffer)
+{
+ if (buffer->heap->ops->unsecure_buffer)
+ buffer->heap->ops->unsecure_buffer(buffer, 1);
+}
+
static void ion_buffer_destroy(struct kref *kref)
{
struct ion_buffer *buffer = container_of(kref, struct ion_buffer, ref);
@@ -268,14 +320,16 @@
if (WARN_ON(buffer->kmap_cnt > 0))
buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
-
buffer->heap->ops->unmap_dma(buffer->heap, buffer);
+ ion_delayed_unsecure(buffer);
ion_iommu_delayed_unmap(buffer);
buffer->heap->ops->free(buffer);
mutex_lock(&dev->lock);
rb_erase(&buffer->node, &dev->buffers);
mutex_unlock(&dev->lock);
+ if (buffer->flags & ION_FLAG_CACHED)
+ kfree(buffer->dirty);
kfree(buffer);
}
@@ -402,7 +456,7 @@
struct ion_handle *handle;
struct ion_device *dev = client->dev;
struct ion_buffer *buffer = NULL;
- unsigned long secure_allocation = flags & ION_SECURE;
+ unsigned long secure_allocation = flags & ION_FLAG_SECURE;
const unsigned int MAX_DBG_STR_LEN = 64;
char dbg_str[MAX_DBG_STR_LEN];
unsigned int dbg_str_idx = 0;
@@ -410,6 +464,16 @@
dbg_str[0] = '\0';
/*
+ * For now, we don't want to fault in pages individually since
+ * clients are already doing manual cache maintenance. In
+ * other words, the implicit caching infrastructure is in
+ * place (in code) but should not be used.
+ */
+ flags |= ION_FLAG_CACHED_NEEDS_SYNC;
+
+ pr_debug("%s: len %d align %d heap_mask %u flags %x\n", __func__, len,
+ align, heap_mask, flags);
+ /*
* traverse the list of heaps available in this system in priority
* order. If the heap type is supported by the client, and matches the
* request of the caller allocate from it. Repeat until allocate has
@@ -431,11 +495,18 @@
continue;
/* Do not allow un-secure heap if secure is specified */
if (secure_allocation &&
- (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP))
+ !ion_heap_allow_secure_allocation(heap->type))
continue;
+ trace_ion_alloc_buffer_start(client->name, heap->name, len,
+ heap_mask, flags);
buffer = ion_buffer_create(heap, dev, len, align, flags);
+ trace_ion_alloc_buffer_end(client->name, heap->name, len,
+ heap_mask, flags);
if (!IS_ERR_OR_NULL(buffer))
break;
+
+ trace_ion_alloc_buffer_fallback(client->name, heap->name, len,
+ heap_mask, flags, PTR_ERR(buffer));
if (dbg_str_idx < MAX_DBG_STR_LEN) {
unsigned int len_left = MAX_DBG_STR_LEN-dbg_str_idx-1;
int ret_value = snprintf(&dbg_str[dbg_str_idx],
@@ -454,10 +525,15 @@
}
mutex_unlock(&dev->lock);
- if (buffer == NULL)
+ if (buffer == NULL) {
+ trace_ion_alloc_buffer_fail(client->name, dbg_str, len,
+ heap_mask, flags, -ENODEV);
return ERR_PTR(-ENODEV);
+ }
if (IS_ERR(buffer)) {
+ trace_ion_alloc_buffer_fail(client->name, dbg_str, len,
+ heap_mask, flags, PTR_ERR(buffer));
pr_debug("ION is unable to allocate 0x%x bytes (alignment: "
"0x%x) from heap(s) %sfor client %s with heap "
"mask 0x%x\n",
@@ -627,6 +703,19 @@
struct ion_iommu_map *iommu_map;
int ret = 0;
+ if (IS_ERR_OR_NULL(client)) {
+ pr_err("%s: client pointer is invalid\n", __func__);
+ return -EINVAL;
+ }
+ if (IS_ERR_OR_NULL(handle)) {
+ pr_err("%s: handle pointer is invalid\n", __func__);
+ return -EINVAL;
+ }
+ if (IS_ERR_OR_NULL(handle->buffer)) {
+ pr_err("%s: buffer pointer is invalid\n", __func__);
+ return -EINVAL;
+ }
+
if (ION_IS_CACHED(flags)) {
pr_err("%s: Cannot map iommu as cached.\n", __func__);
return -EINVAL;
@@ -687,6 +776,8 @@
if (iommu_map->flags & ION_IOMMU_UNMAP_DELAYED)
kref_get(&iommu_map->ref);
+ } else {
+ ret = PTR_ERR(iommu_map);
}
} else {
if (iommu_map->flags != iommu_flags) {
@@ -732,6 +823,19 @@
struct ion_iommu_map *iommu_map;
struct ion_buffer *buffer;
+ if (IS_ERR_OR_NULL(client)) {
+ pr_err("%s: client pointer is invalid\n", __func__);
+ return;
+ }
+ if (IS_ERR_OR_NULL(handle)) {
+ pr_err("%s: handle pointer is invalid\n", __func__);
+ return;
+ }
+ if (IS_ERR_OR_NULL(handle->buffer)) {
+ pr_err("%s: buffer pointer is invalid\n", __func__);
+ return;
+ }
+
mutex_lock(&client->lock);
buffer = handle->buffer;
@@ -865,7 +969,7 @@
if (type == ION_HEAP_TYPE_SYSTEM_CONTIG ||
type == ION_HEAP_TYPE_CARVEOUT ||
type == (enum ion_heap_type) ION_HEAP_TYPE_CP)
- seq_printf(s, " : %12lx", handle->buffer->priv_phys);
+ seq_printf(s, " : %12pa", &handle->buffer->priv_phys);
else
seq_printf(s, " : %12s", "N/A");
@@ -975,10 +1079,91 @@
return client;
}
+/**
+ * ion_mark_dangling_buffers_locked() - Mark dangling buffers
+ * @dev: the ion device whose buffers will be searched
+ *
+ * Sets marked=1 for all known buffers associated with `dev' that no
+ * longer have a handle pointing to them. dev->lock should be held
+ * across a call to this function (and should only be unlocked after
+ * checking for marked buffers).
+ */
+static void ion_mark_dangling_buffers_locked(struct ion_device *dev)
+{
+ struct rb_node *n, *n2;
+ /* mark all buffers as 1 */
+ for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
+ struct ion_buffer *buf = rb_entry(n, struct ion_buffer,
+ node);
+
+ buf->marked = 1;
+ }
+
+ /* now see which buffers we can access */
+ for (n = rb_first(&dev->clients); n; n = rb_next(n)) {
+ struct ion_client *client = rb_entry(n, struct ion_client,
+ node);
+
+ mutex_lock(&client->lock);
+ for (n2 = rb_first(&client->handles); n2; n2 = rb_next(n2)) {
+ struct ion_handle *handle
+ = rb_entry(n2, struct ion_handle, node);
+
+ handle->buffer->marked = 0;
+
+ }
+ mutex_unlock(&client->lock);
+
+ }
+}
+
+#ifdef CONFIG_ION_LEAK_CHECK
+static u32 ion_debug_check_leaks_on_destroy;
+
+static int ion_check_for_and_print_leaks(struct ion_device *dev)
+{
+ struct rb_node *n;
+ int num_leaks = 0;
+
+ if (!ion_debug_check_leaks_on_destroy)
+ return 0;
+
+ /* check for leaked buffers (those that no longer have a
+ * handle pointing to them) */
+ ion_mark_dangling_buffers_locked(dev);
+
+ /* Anyone still marked as a 1 means a leaked handle somewhere */
+ for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
+ struct ion_buffer *buf = rb_entry(n, struct ion_buffer,
+ node);
+
+ if (buf->marked == 1) {
+ pr_info("Leaked ion buffer at %p\n", buf);
+ num_leaks++;
+ }
+ }
+ return num_leaks;
+}
+static void setup_ion_leak_check(struct dentry *debug_root)
+{
+ debugfs_create_bool("check_leaks_on_destroy", 0664, debug_root,
+ &ion_debug_check_leaks_on_destroy);
+}
+#else
+static int ion_check_for_and_print_leaks(struct ion_device *dev)
+{
+ return 0;
+}
+static void setup_ion_leak_check(struct dentry *debug_root)
+{
+}
+#endif
+
void ion_client_destroy(struct ion_client *client)
{
struct ion_device *dev = client->dev;
struct rb_node *n;
+ int num_leaks;
pr_debug("%s: %d\n", __func__, __LINE__);
while ((n = rb_first(&client->handles))) {
@@ -991,8 +1176,21 @@
put_task_struct(client->task);
rb_erase(&client->node, &dev->clients);
debugfs_remove_recursive(client->debug_root);
+
+ num_leaks = ion_check_for_and_print_leaks(dev);
+
mutex_unlock(&dev->lock);
+ if (num_leaks) {
+ struct task_struct *current_task = current;
+ char current_task_name[TASK_COMM_LEN];
+ get_task_comm(current_task_name, current_task);
+ WARN(1, "%s: Detected %d leaked ion buffer%s.\n",
+ __func__, num_leaks, num_leaks == 1 ? "" : "s");
+ pr_info("task name at time of leak: %s, pid: %d\n",
+ current_task_name, current_task->pid);
+ }
+
kfree(client->name);
kfree(client);
}
@@ -1062,12 +1260,47 @@
}
EXPORT_SYMBOL(ion_sg_table);
+struct sg_table *ion_create_chunked_sg_table(phys_addr_t buffer_base,
+ size_t chunk_size, size_t total_size)
+{
+ struct sg_table *table;
+ int i, n_chunks, ret;
+ struct scatterlist *sg;
+
+ table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!table)
+ return ERR_PTR(-ENOMEM);
+
+ n_chunks = DIV_ROUND_UP(total_size, chunk_size);
+ pr_debug("creating sg_table with %d chunks\n", n_chunks);
+
+ ret = sg_alloc_table(table, n_chunks, GFP_KERNEL);
+ if (ret)
+ goto err0;
+
+ for_each_sg(table->sgl, sg, table->nents, i) {
+ dma_addr_t addr = buffer_base + i * chunk_size;
+ sg_dma_address(sg) = addr;
+ sg_dma_len(sg) = chunk_size;
+ }
+
+ return table;
+err0:
+ kfree(table);
+ return ERR_PTR(ret);
+}
+
+static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
+ struct device *dev,
+ enum dma_data_direction direction);
+
static struct sg_table *ion_map_dma_buf(struct dma_buf_attachment *attachment,
enum dma_data_direction direction)
{
struct dma_buf *dmabuf = attachment->dmabuf;
struct ion_buffer *buffer = dmabuf->priv;
+ ion_buffer_sync_for_device(buffer, attachment->dev, direction);
return buffer->sg_table;
}
@@ -1077,40 +1310,119 @@
{
}
-static void ion_vma_open(struct vm_area_struct *vma)
+static int ion_buffer_alloc_dirty(struct ion_buffer *buffer)
{
- struct ion_buffer *buffer = vma->vm_private_data;
+ unsigned long pages = buffer->sg_table->nents;
+ unsigned long length = (pages + BITS_PER_LONG - 1)/BITS_PER_LONG;
- pr_debug("%s: %d\n", __func__, __LINE__);
+ buffer->dirty = kzalloc(length * sizeof(unsigned long), GFP_KERNEL);
+ if (!buffer->dirty)
+ return -ENOMEM;
+ return 0;
+}
+
+struct ion_vma_list {
+ struct list_head list;
+ struct vm_area_struct *vma;
+};
+
+static void ion_buffer_sync_for_device(struct ion_buffer *buffer,
+ struct device *dev,
+ enum dma_data_direction dir)
+{
+ struct scatterlist *sg;
+ int i;
+ struct ion_vma_list *vma_list;
+
+ pr_debug("%s: syncing for device %s\n", __func__,
+ dev ? dev_name(dev) : "null");
+
+ if (!ion_buffer_fault_user_mappings(buffer))
+ return;
mutex_lock(&buffer->lock);
- buffer->umap_cnt++;
+ for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
+ if (!test_bit(i, buffer->dirty))
+ continue;
+ dma_sync_sg_for_device(dev, sg, 1, dir);
+ clear_bit(i, buffer->dirty);
+ }
+ list_for_each_entry(vma_list, &buffer->vmas, list) {
+ struct vm_area_struct *vma = vma_list->vma;
+
+ zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start,
+ NULL);
+ }
mutex_unlock(&buffer->lock);
}
-static void ion_vma_close(struct vm_area_struct *vma)
+int ion_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
struct ion_buffer *buffer = vma->vm_private_data;
-
- pr_debug("%s: %d\n", __func__, __LINE__);
+ struct scatterlist *sg;
+ int i;
mutex_lock(&buffer->lock);
- buffer->umap_cnt--;
+ set_bit(vmf->pgoff, buffer->dirty);
+
+ for_each_sg(buffer->sg_table->sgl, sg, buffer->sg_table->nents, i) {
+ if (i != vmf->pgoff)
+ continue;
+ dma_sync_sg_for_cpu(NULL, sg, 1, DMA_BIDIRECTIONAL);
+ vm_insert_page(vma, (unsigned long)vmf->virtual_address,
+ sg_page(sg));
+ break;
+ }
+ mutex_unlock(&buffer->lock);
+ return VM_FAULT_NOPAGE;
+}
+
+static void ion_vm_open(struct vm_area_struct *vma)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+ struct ion_vma_list *vma_list;
+
+ vma_list = kmalloc(sizeof(struct ion_vma_list), GFP_KERNEL);
+ if (!vma_list)
+ return;
+ vma_list->vma = vma;
+ mutex_lock(&buffer->lock);
+ list_add(&vma_list->list, &buffer->vmas);
+ mutex_unlock(&buffer->lock);
+ pr_debug("%s: adding %p\n", __func__, vma);
+}
+
+static void ion_vm_close(struct vm_area_struct *vma)
+{
+ struct ion_buffer *buffer = vma->vm_private_data;
+ struct ion_vma_list *vma_list, *tmp;
+
+ pr_debug("%s\n", __func__);
+ mutex_lock(&buffer->lock);
+ list_for_each_entry_safe(vma_list, tmp, &buffer->vmas, list) {
+ if (vma_list->vma != vma)
+ continue;
+ list_del(&vma_list->list);
+ kfree(vma_list);
+ pr_debug("%s: deleting %p\n", __func__, vma);
+ break;
+ }
mutex_unlock(&buffer->lock);
if (buffer->heap->ops->unmap_user)
buffer->heap->ops->unmap_user(buffer->heap, buffer);
}
-static struct vm_operations_struct ion_vm_ops = {
- .open = ion_vma_open,
- .close = ion_vma_close,
+struct vm_operations_struct ion_vma_ops = {
+ .open = ion_vm_open,
+ .close = ion_vm_close,
+ .fault = ion_vm_fault,
};
static int ion_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma)
{
struct ion_buffer *buffer = dmabuf->priv;
- int ret;
+ int ret = 0;
if (!buffer->heap->ops->map_user) {
pr_err("%s: this heap does not define a method for mapping "
@@ -1118,25 +1430,26 @@
return -EINVAL;
}
+ if (ion_buffer_fault_user_mappings(buffer)) {
+ vma->vm_private_data = buffer;
+ vma->vm_ops = &ion_vma_ops;
+ vma->vm_flags |= VM_MIXEDMAP;
+ ion_vm_open(vma);
+ return 0;
+ }
+
+ if (!(buffer->flags & ION_FLAG_CACHED))
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
mutex_lock(&buffer->lock);
/* now map it to userspace */
ret = buffer->heap->ops->map_user(buffer->heap, buffer, vma);
+ mutex_unlock(&buffer->lock);
- if (ret) {
- mutex_unlock(&buffer->lock);
+ if (ret)
pr_err("%s: failure mapping buffer to userspace\n",
__func__);
- } else {
- buffer->umap_cnt++;
- mutex_unlock(&buffer->lock);
- vma->vm_ops = &ion_vm_ops;
- /*
- * move the buffer into the vm_private_data so we can access it
- * from vma_open/close
- */
- vma->vm_private_data = buffer;
- }
return ret;
}
@@ -1205,33 +1518,6 @@
.kunmap = ion_dma_buf_kunmap,
};
-static int ion_share_set_flags(struct ion_client *client,
- struct ion_handle *handle,
- unsigned long flags)
-{
- struct ion_buffer *buffer;
- bool valid_handle;
- unsigned long ion_flags = 0;
- if (flags & O_DSYNC)
- ion_flags = ION_SET_UNCACHED(ion_flags);
- else
- ion_flags = ION_SET_CACHED(ion_flags);
-
-
- mutex_lock(&client->lock);
- valid_handle = ion_handle_validate(client, handle);
- mutex_unlock(&client->lock);
- if (!valid_handle) {
- WARN(1, "%s: invalid handle passed to set_flags.\n", __func__);
- return -EINVAL;
- }
-
- buffer = handle->buffer;
-
- return 0;
-}
-
-
int ion_share_dma_buf(struct ion_client *client, struct ion_handle *handle)
{
struct ion_buffer *buffer;
@@ -1299,6 +1585,30 @@
}
EXPORT_SYMBOL(ion_import_dma_buf);
+static int ion_sync_for_device(struct ion_client *client, int fd)
+{
+ struct dma_buf *dmabuf;
+ struct ion_buffer *buffer;
+
+ dmabuf = dma_buf_get(fd);
+ if (IS_ERR_OR_NULL(dmabuf))
+ return PTR_ERR(dmabuf);
+
+ /* if this memory came from ion */
+ if (dmabuf->ops != &dma_buf_ops) {
+ pr_err("%s: can not sync dmabuf from another exporter\n",
+ __func__);
+ dma_buf_put(dmabuf);
+ return -EINVAL;
+ }
+ buffer = dmabuf->priv;
+
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_BIDIRECTIONAL);
+ dma_buf_put(dmabuf);
+ return 0;
+}
+
static long ion_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
{
struct ion_client *client = filp->private_data;
@@ -1342,14 +1652,9 @@
case ION_IOC_SHARE:
{
struct ion_fd_data data;
- int ret;
if (copy_from_user(&data, (void __user *)arg, sizeof(data)))
return -EFAULT;
- ret = ion_share_set_flags(client, data.handle, filp->f_flags);
- if (ret)
- return ret;
-
data.fd = ion_share_dma_buf(client, data.handle);
if (copy_to_user((void __user *)arg, &data, sizeof(data)))
return -EFAULT;
@@ -1376,6 +1681,15 @@
return ret;
break;
}
+ case ION_IOC_SYNC:
+ {
+ struct ion_fd_data data;
+ if (copy_from_user(&data, (void __user *)arg,
+ sizeof(struct ion_fd_data)))
+ return -EFAULT;
+ ion_sync_for_device(client, data.fd);
+ break;
+ }
case ION_IOC_CUSTOM:
{
struct ion_device *dev = client->dev;
@@ -1397,9 +1711,6 @@
case ION_IOC_CLEAN_INV_CACHES:
return client->dev->custom_ioctl(client,
ION_IOC_CLEAN_INV_CACHES, arg);
- case ION_IOC_GET_FLAGS:
- return client->dev->custom_ioctl(client,
- ION_IOC_GET_FLAGS, arg);
default:
return -ENOTTY;
}
@@ -1542,6 +1853,10 @@
{
struct ion_device *dev = heap->dev;
struct rb_node *n;
+ size_t size;
+
+ if (!heap->ops->phys)
+ return;
for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
struct ion_buffer *buffer =
@@ -1554,9 +1869,11 @@
"Part of memory map will not be logged\n");
break;
}
- data->addr = buffer->priv_phys;
- data->addr_end = buffer->priv_phys + buffer->size-1;
- data->size = buffer->size;
+
+ buffer->heap->ops->phys(buffer->heap, buffer,
+ &(data->addr), &size);
+ data->size = (unsigned long) size;
+ data->addr_end = data->addr + data->size - 1;
data->client_name = ion_debug_locate_owner(dev, buffer);
ion_debug_mem_map_add(mem_map, data);
}
@@ -1674,6 +1991,73 @@
mutex_unlock(&dev->lock);
}
+int ion_secure_handle(struct ion_client *client, struct ion_handle *handle,
+ int version, void *data, int flags)
+{
+ int ret = -EINVAL;
+ struct ion_heap *heap;
+ struct ion_buffer *buffer;
+
+ mutex_lock(&client->lock);
+ if (!ion_handle_validate(client, handle)) {
+ WARN(1, "%s: invalid handle passed to secure.\n", __func__);
+ goto out_unlock;
+ }
+
+ buffer = handle->buffer;
+ heap = buffer->heap;
+
+ if (!ion_heap_allow_handle_secure(heap->type)) {
+ pr_err("%s: cannot secure buffer from non secure heap\n",
+ __func__);
+ goto out_unlock;
+ }
+
+ BUG_ON(!buffer->heap->ops->secure_buffer);
+ /*
+ * Protect the handle via the client lock to ensure we aren't
+ * racing with free
+ */
+ ret = buffer->heap->ops->secure_buffer(buffer, version, data, flags);
+
+out_unlock:
+ mutex_unlock(&client->lock);
+ return ret;
+}
+
+int ion_unsecure_handle(struct ion_client *client, struct ion_handle *handle)
+{
+ int ret = -EINVAL;
+ struct ion_heap *heap;
+ struct ion_buffer *buffer;
+
+ mutex_lock(&client->lock);
+ if (!ion_handle_validate(client, handle)) {
+ WARN(1, "%s: invalid handle passed to secure.\n", __func__);
+ goto out_unlock;
+ }
+
+ buffer = handle->buffer;
+ heap = buffer->heap;
+
+ if (!ion_heap_allow_handle_secure(heap->type)) {
+ pr_err("%s: cannot secure buffer from non secure heap\n",
+ __func__);
+ goto out_unlock;
+ }
+
+ BUG_ON(!buffer->heap->ops->unsecure_buffer);
+ /*
+ * Protect the handle via the client lock to ensure we aren't
+ * racing with free
+ */
+ ret = buffer->heap->ops->unsecure_buffer(buffer, 0);
+
+out_unlock:
+ mutex_unlock(&client->lock);
+ return ret;
+}
+
int ion_secure_heap(struct ion_device *dev, int heap_id, int version,
void *data)
{
@@ -1687,7 +2071,7 @@
mutex_lock(&dev->lock);
for (n = rb_first(&dev->heaps); n != NULL; n = rb_next(n)) {
struct ion_heap *heap = rb_entry(n, struct ion_heap, node);
- if (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP)
+ if (!ion_heap_allow_heap_secure(heap->type))
continue;
if (ION_HEAP(heap->id) != heap_id)
continue;
@@ -1715,7 +2099,7 @@
mutex_lock(&dev->lock);
for (n = rb_first(&dev->heaps); n != NULL; n = rb_next(n)) {
struct ion_heap *heap = rb_entry(n, struct ion_heap, node);
- if (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP)
+ if (!ion_heap_allow_heap_secure(heap->type))
continue;
if (ION_HEAP(heap->id) != heap_id)
continue;
@@ -1734,37 +2118,14 @@
{
struct ion_device *dev = s->private;
struct rb_node *n;
- struct rb_node *n2;
- /* mark all buffers as 1 */
seq_printf(s, "%16.s %16.s %16.s %16.s\n", "buffer", "heap", "size",
"ref cnt");
+
mutex_lock(&dev->lock);
- for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
- struct ion_buffer *buf = rb_entry(n, struct ion_buffer,
- node);
+ ion_mark_dangling_buffers_locked(dev);
- buf->marked = 1;
- }
-
- /* now see which buffers we can access */
- for (n = rb_first(&dev->clients); n; n = rb_next(n)) {
- struct ion_client *client = rb_entry(n, struct ion_client,
- node);
-
- mutex_lock(&client->lock);
- for (n2 = rb_first(&client->handles); n2; n2 = rb_next(n2)) {
- struct ion_handle *handle = rb_entry(n2,
- struct ion_handle, node);
-
- handle->buffer->marked = 0;
-
- }
- mutex_unlock(&client->lock);
-
- }
-
- /* And anyone still marked as a 1 means a leaked handle somewhere */
+ /* Anyone still marked as a 1 means a leaked handle somewhere */
for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
struct ion_buffer *buf = rb_entry(n, struct ion_buffer,
node);
@@ -1825,6 +2186,8 @@
idev->clients = RB_ROOT;
debugfs_create_file("check_leaked_fds", 0664, idev->debug_root, idev,
&debug_leak_fops);
+
+ setup_ion_leak_check(idev->debug_root);
return idev;
}
@@ -1845,8 +2208,8 @@
ret = memblock_reserve(data->heaps[i].base,
data->heaps[i].size);
if (ret)
- pr_err("memblock reserve of %x@%lx failed\n",
+ pr_err("memblock reserve of %x@%pa failed\n",
data->heaps[i].size,
- data->heaps[i].base);
+ &data->heaps[i].base);
}
}
diff --git a/drivers/gpu/ion/ion_carveout_heap.c b/drivers/gpu/ion/ion_carveout_heap.c
index a808cc9..9610dfe 100644
--- a/drivers/gpu/ion/ion_carveout_heap.c
+++ b/drivers/gpu/ion/ion_carveout_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_carveout_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -23,6 +23,7 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/iommu.h>
#include <linux/seq_file.h>
#include "ion_priv.h"
@@ -111,26 +112,13 @@
struct sg_table *ion_carveout_heap_map_dma(struct ion_heap *heap,
struct ion_buffer *buffer)
{
- struct sg_table *table;
- int ret;
+ size_t chunk_size = buffer->size;
- table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
- if (!table)
- return ERR_PTR(-ENOMEM);
+ if (ION_IS_CACHED(buffer->flags))
+ chunk_size = PAGE_SIZE;
- ret = sg_alloc_table(table, 1, GFP_KERNEL);
- if (ret)
- goto err0;
-
- table->sgl->length = buffer->size;
- table->sgl->offset = 0;
- table->sgl->dma_address = buffer->priv_phys;
-
- return table;
-
-err0:
- kfree(table);
- return ERR_PTR(ret);
+ return ion_create_chunked_sg_table(buffer->priv_phys, chunk_size,
+ buffer->size);
}
void ion_carveout_heap_unmap_dma(struct ion_heap *heap,
@@ -240,25 +228,78 @@
void *vaddr, unsigned int offset, unsigned int length,
unsigned int cmd)
{
- void (*outer_cache_op)(phys_addr_t, phys_addr_t);
+ void (*outer_cache_op)(phys_addr_t, phys_addr_t) = NULL;
struct ion_carveout_heap *carveout_heap =
container_of(heap, struct ion_carveout_heap, heap);
+ unsigned int size_to_vmap, total_size;
+ int i, j;
+ void *ptr = NULL;
+ ion_phys_addr_t buff_phys = buffer->priv_phys;
- switch (cmd) {
- case ION_IOC_CLEAN_CACHES:
- dmac_clean_range(vaddr, vaddr + length);
- outer_cache_op = outer_clean_range;
- break;
- case ION_IOC_INV_CACHES:
- dmac_inv_range(vaddr, vaddr + length);
- outer_cache_op = outer_inv_range;
- break;
- case ION_IOC_CLEAN_INV_CACHES:
- dmac_flush_range(vaddr, vaddr + length);
- outer_cache_op = outer_flush_range;
- break;
- default:
- return -EINVAL;
+ if (!vaddr) {
+ /*
+ * Split the vmalloc space into smaller regions in
+ * order to clean and/or invalidate the cache.
+ */
+ size_to_vmap = ((VMALLOC_END - VMALLOC_START)/8);
+ total_size = buffer->size;
+
+ for (i = 0; i < total_size; i += size_to_vmap) {
+ size_to_vmap = min(size_to_vmap, total_size - i);
+ for (j = 0; j < 10 && size_to_vmap; ++j) {
+ ptr = ioremap(buff_phys, size_to_vmap);
+ if (ptr) {
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ dmac_clean_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_clean_range;
+ break;
+ case ION_IOC_INV_CACHES:
+ dmac_inv_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_inv_range;
+ break;
+ case ION_IOC_CLEAN_INV_CACHES:
+ dmac_flush_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_flush_range;
+ break;
+ default:
+ return -EINVAL;
+ }
+ buff_phys += size_to_vmap;
+ break;
+ } else {
+ size_to_vmap >>= 1;
+ }
+ }
+ if (!ptr) {
+ pr_err("Couldn't io-remap the memory\n");
+ return -EINVAL;
+ }
+ iounmap(ptr);
+ }
+ } else {
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ dmac_clean_range(vaddr, vaddr + length);
+ outer_cache_op = outer_clean_range;
+ break;
+ case ION_IOC_INV_CACHES:
+ dmac_inv_range(vaddr, vaddr + length);
+ outer_cache_op = outer_inv_range;
+ break;
+ case ION_IOC_CLEAN_INV_CACHES:
+ dmac_flush_range(vaddr, vaddr + length);
+ outer_cache_op = outer_flush_range;
+ break;
+ default:
+ return -EINVAL;
+ }
}
if (carveout_heap->has_outer_cache) {
@@ -296,8 +337,11 @@
const char *client_name = "(null)";
if (last_end < data->addr) {
- seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
- "FREE", last_end, data->addr-1,
+ phys_addr_t da;
+
+ da = data->addr-1;
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ "FREE", &last_end, &da,
data->addr-last_end,
data->addr-last_end);
}
@@ -305,9 +349,9 @@
if (data->client_name)
client_name = data->client_name;
- seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
- client_name, data->addr,
- data->addr_end,
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ client_name, &data->addr,
+ &data->addr_end,
data->size, data->size);
last_end = data->addr_end+1;
}
@@ -357,7 +401,7 @@
goto out1;
}
- sglist = kmalloc(sizeof(*sglist), GFP_KERNEL);
+ sglist = vmalloc(sizeof(*sglist));
if (!sglist)
goto out1;
@@ -376,18 +420,19 @@
if (extra) {
unsigned long extra_iova_addr = data->iova_addr + buffer->size;
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra,
- SZ_4K, prot);
+ unsigned long phys_addr = sg_phys(sglist);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
- kfree(sglist);
+ vfree(sglist);
return ret;
out2:
iommu_unmap_range(domain, data->iova_addr, buffer->size);
out1:
- kfree(sglist);
+ vfree(sglist);
msm_free_iova_address(data->iova_addr, domain_num, partition_num,
data->mapped_size);
diff --git a/drivers/gpu/ion/ion_cma_heap.c b/drivers/gpu/ion/ion_cma_heap.c
index bef6b6f..4f12e38 100644
--- a/drivers/gpu/ion/ion_cma_heap.c
+++ b/drivers/gpu/ion/ion_cma_heap.c
@@ -127,8 +127,8 @@
struct device *dev = heap->priv;
struct ion_cma_buffer_info *info = buffer->priv_virt;
- dev_dbg(dev, "Return buffer %p physical address 0x%x\n", buffer,
- info->handle);
+ dev_dbg(dev, "Return buffer %p physical address 0x%pa\n", buffer,
+ &info->handle);
*addr = info->handle;
*len = buffer->size;
@@ -228,8 +228,9 @@
extra_iova_addr = data->iova_addr + buffer->size;
if (extra) {
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
- prot);
+ unsigned long phys_addr = sg_phys(table->sgl);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
@@ -280,15 +281,30 @@
switch (cmd) {
case ION_IOC_CLEAN_CACHES:
- dmac_clean_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ else
+ dmac_clean_range(vaddr, vaddr + length);
outer_cache_op = outer_clean_range;
break;
case ION_IOC_INV_CACHES:
- dmac_inv_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ else
+ dmac_inv_range(vaddr, vaddr + length);
outer_cache_op = outer_inv_range;
break;
case ION_IOC_CLEAN_INV_CACHES:
- dmac_flush_range(vaddr, vaddr + length);
+ if (!vaddr) {
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ } else {
+ dmac_flush_range(vaddr, vaddr + length);
+ }
outer_cache_op = outer_flush_range;
break;
default:
@@ -304,6 +320,35 @@
return 0;
}
+static int ion_cma_print_debug(struct ion_heap *heap, struct seq_file *s,
+ const struct rb_root *mem_map)
+{
+ if (mem_map) {
+ struct rb_node *n;
+
+ seq_printf(s, "\nMemory Map\n");
+ seq_printf(s, "%16.s %14.s %14.s %14.s\n",
+ "client", "start address", "end address",
+ "size (hex)");
+
+ for (n = rb_first(mem_map); n; n = rb_next(n)) {
+ struct mem_map_data *data =
+ rb_entry(n, struct mem_map_data, node);
+ const char *client_name = "(null)";
+
+
+ if (data->client_name)
+ client_name = data->client_name;
+
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ client_name, &data->addr,
+ &data->addr_end,
+ data->size, data->size);
+ }
+ }
+ return 0;
+}
+
static struct ion_heap_ops ion_cma_ops = {
.allocate = ion_cma_allocate,
.free = ion_cma_free,
@@ -316,6 +361,7 @@
.map_iommu = ion_cma_map_iommu,
.unmap_iommu = ion_cma_unmap_iommu,
.cache_op = ion_cma_cache_ops,
+ .print_debug = ion_cma_print_debug,
};
struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *data)
diff --git a/drivers/gpu/ion/ion_cma_secure_heap.c b/drivers/gpu/ion/ion_cma_secure_heap.c
new file mode 100644
index 0000000..0fbcfbf
--- /dev/null
+++ b/drivers/gpu/ion/ion_cma_secure_heap.c
@@ -0,0 +1,386 @@
+/*
+ * drivers/gpu/ion/ion_secure_cma_heap.c
+ *
+ * Copyright (C) Linaro 2012
+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/ion.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <linux/msm_ion.h>
+#include <mach/iommu_domains.h>
+
+#include <asm/cacheflush.h>
+
+/* for ion_heap_ops structure */
+#include "ion_priv.h"
+#include "msm/ion_cp_common.h"
+
+#define ION_CMA_ALLOCATE_FAILED NULL
+
+struct ion_secure_cma_buffer_info {
+ /*
+ * This needs to come first for compatibility with the secure buffer API
+ */
+ struct ion_cp_buffer secure;
+ void *cpu_addr;
+ dma_addr_t handle;
+ struct sg_table *table;
+ bool is_cached;
+};
+
+static int cma_heap_has_outer_cache;
+/*
+ * Create scatter-list for the already allocated DMA buffer.
+ * This function could be replace by dma_common_get_sgtable
+ * as soon as it will avalaible.
+ */
+int ion_secure_cma_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t handle, size_t size)
+{
+ struct page *page = virt_to_page(cpu_addr);
+ int ret;
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (unlikely(ret))
+ return ret;
+
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ sg_dma_address(sgt->sgl) = handle;
+ return 0;
+}
+
+/* ION CMA heap operations functions */
+static struct ion_secure_cma_buffer_info *__ion_secure_cma_allocate(
+ struct ion_heap *heap, struct ion_buffer *buffer,
+ unsigned long len, unsigned long align,
+ unsigned long flags)
+{
+ struct device *dev = heap->priv;
+ struct ion_secure_cma_buffer_info *info;
+ DEFINE_DMA_ATTRS(attrs);
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+
+ dev_dbg(dev, "Request buffer allocation len %ld\n", len);
+
+ info = kzalloc(sizeof(struct ion_secure_cma_buffer_info), GFP_KERNEL);
+ if (!info) {
+ dev_err(dev, "Can't allocate buffer info\n");
+ return ION_CMA_ALLOCATE_FAILED;
+ }
+
+ info->cpu_addr = dma_alloc_attrs(dev, len, &(info->handle), 0, &attrs);
+
+ if (!info->cpu_addr) {
+ dev_err(dev, "Fail to allocate buffer\n");
+ goto err;
+ }
+
+ info->table = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!info->table) {
+ dev_err(dev, "Fail to allocate sg table\n");
+ goto err;
+ }
+
+ ion_secure_cma_get_sgtable(dev,
+ info->table, info->cpu_addr, info->handle, len);
+
+ info->secure.buffer = info->handle;
+
+ /* keep this for memory release */
+ buffer->priv_virt = info;
+ dev_dbg(dev, "Allocate buffer %p\n", buffer);
+ return info;
+
+err:
+ kfree(info);
+ return ION_CMA_ALLOCATE_FAILED;
+}
+
+static int ion_secure_cma_allocate(struct ion_heap *heap,
+ struct ion_buffer *buffer,
+ unsigned long len, unsigned long align,
+ unsigned long flags)
+{
+ unsigned long secure_allocation = flags & ION_FLAG_SECURE;
+ struct ion_secure_cma_buffer_info *buf = NULL;
+
+ if (!secure_allocation) {
+ pr_err("%s: non-secure allocation disallowed from heap %s %lx\n",
+ __func__, heap->name, flags);
+ return -ENOMEM;
+ }
+
+ if (ION_IS_CACHED(flags)) {
+ pr_err("%s: cannot allocate cached memory from secure heap %s\n",
+ __func__, heap->name);
+ return -ENOMEM;
+ }
+
+
+ buf = __ion_secure_cma_allocate(heap, buffer, len, align, flags);
+
+ if (buf) {
+ buf->secure.want_delayed_unsecure = 0;
+ atomic_set(&buf->secure.secure_cnt, 0);
+ mutex_init(&buf->secure.lock);
+ buf->secure.is_secure = 1;
+ return 0;
+ } else {
+ return -ENOMEM;
+ }
+}
+
+
+static void ion_secure_cma_free(struct ion_buffer *buffer)
+{
+ struct device *dev = buffer->heap->priv;
+ struct ion_secure_cma_buffer_info *info = buffer->priv_virt;
+
+ dev_dbg(dev, "Release buffer %p\n", buffer);
+ /* release memory */
+ dma_free_coherent(dev, buffer->size, info->cpu_addr, info->handle);
+ /* release sg table */
+ kfree(info->table);
+ kfree(info);
+}
+
+static int ion_secure_cma_phys(struct ion_heap *heap, struct ion_buffer *buffer,
+ ion_phys_addr_t *addr, size_t *len)
+{
+ struct device *dev = heap->priv;
+ struct ion_secure_cma_buffer_info *info = buffer->priv_virt;
+
+ dev_dbg(dev, "Return buffer %p physical address 0x%pa\n", buffer,
+ &info->handle);
+
+ *addr = info->handle;
+ *len = buffer->size;
+
+ return 0;
+}
+
+struct sg_table *ion_secure_cma_heap_map_dma(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ struct ion_secure_cma_buffer_info *info = buffer->priv_virt;
+
+ return info->table;
+}
+
+void ion_secure_cma_heap_unmap_dma(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return;
+}
+
+static int ion_secure_cma_mmap(struct ion_heap *mapper,
+ struct ion_buffer *buffer,
+ struct vm_area_struct *vma)
+{
+ pr_info("%s: mmaping from secure heap %s disallowed\n",
+ __func__, mapper->name);
+ return -EINVAL;
+}
+
+static void *ion_secure_cma_map_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ pr_info("%s: kernel mapping from secure heap %s disallowed\n",
+ __func__, heap->name);
+ return NULL;
+}
+
+static void ion_secure_cma_unmap_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return;
+}
+
+int ion_secure_cma_map_iommu(struct ion_buffer *buffer,
+ struct ion_iommu_map *data,
+ unsigned int domain_num,
+ unsigned int partition_num,
+ unsigned long align,
+ unsigned long iova_length,
+ unsigned long flags)
+{
+ int ret = 0;
+ struct iommu_domain *domain;
+ unsigned long extra;
+ unsigned long extra_iova_addr;
+ struct ion_secure_cma_buffer_info *info = buffer->priv_virt;
+ struct sg_table *table = info->table;
+ int prot = IOMMU_WRITE | IOMMU_READ;
+
+ data->mapped_size = iova_length;
+
+ if (!msm_use_iommu()) {
+ data->iova_addr = info->handle;
+ return 0;
+ }
+
+ extra = iova_length - buffer->size;
+
+ ret = msm_allocate_iova_address(domain_num, partition_num,
+ data->mapped_size, align,
+ &data->iova_addr);
+
+ if (ret)
+ goto out;
+
+ domain = msm_get_iommu_domain(domain_num);
+
+ if (!domain) {
+ ret = -EINVAL;
+ goto out1;
+ }
+
+ ret = iommu_map_range(domain, data->iova_addr, table->sgl,
+ buffer->size, prot);
+
+ if (ret) {
+ pr_err("%s: could not map %lx in domain %p\n",
+ __func__, data->iova_addr, domain);
+ goto out1;
+ }
+
+ extra_iova_addr = data->iova_addr + buffer->size;
+ if (extra) {
+ unsigned long phys_addr = sg_phys(table->sgl);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
+ if (ret)
+ goto out2;
+ }
+ return ret;
+
+out2:
+ iommu_unmap_range(domain, data->iova_addr, buffer->size);
+out1:
+ msm_free_iova_address(data->iova_addr, domain_num, partition_num,
+ data->mapped_size);
+out:
+ return ret;
+}
+
+
+void ion_secure_cma_unmap_iommu(struct ion_iommu_map *data)
+{
+ unsigned int domain_num;
+ unsigned int partition_num;
+ struct iommu_domain *domain;
+
+ if (!msm_use_iommu())
+ return;
+
+ domain_num = iommu_map_domain(data);
+ partition_num = iommu_map_partition(data);
+
+ domain = msm_get_iommu_domain(domain_num);
+
+ if (!domain) {
+ WARN(1, "Could not get domain %d. Corruption?\n", domain_num);
+ return;
+ }
+
+ iommu_unmap_range(domain, data->iova_addr, data->mapped_size);
+ msm_free_iova_address(data->iova_addr, domain_num, partition_num,
+ data->mapped_size);
+
+ return;
+}
+
+int ion_secure_cma_cache_ops(struct ion_heap *heap,
+ struct ion_buffer *buffer, void *vaddr,
+ unsigned int offset, unsigned int length,
+ unsigned int cmd)
+{
+ pr_info("%s: cache operations disallowed from secure heap %s\n",
+ __func__, heap->name);
+ return -EINVAL;
+}
+
+static int ion_secure_cma_print_debug(struct ion_heap *heap, struct seq_file *s,
+ const struct rb_root *mem_map)
+{
+ if (mem_map) {
+ struct rb_node *n;
+
+ seq_printf(s, "\nMemory Map\n");
+ seq_printf(s, "%16.s %14.s %14.s %14.s\n",
+ "client", "start address", "end address",
+ "size (hex)");
+
+ for (n = rb_first(mem_map); n; n = rb_next(n)) {
+ struct mem_map_data *data =
+ rb_entry(n, struct mem_map_data, node);
+ const char *client_name = "(null)";
+
+
+ if (data->client_name)
+ client_name = data->client_name;
+
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ client_name, &data->addr,
+ &data->addr_end,
+ data->size, data->size);
+ }
+ }
+ return 0;
+}
+
+static struct ion_heap_ops ion_secure_cma_ops = {
+ .allocate = ion_secure_cma_allocate,
+ .free = ion_secure_cma_free,
+ .map_dma = ion_secure_cma_heap_map_dma,
+ .unmap_dma = ion_secure_cma_heap_unmap_dma,
+ .phys = ion_secure_cma_phys,
+ .map_user = ion_secure_cma_mmap,
+ .map_kernel = ion_secure_cma_map_kernel,
+ .unmap_kernel = ion_secure_cma_unmap_kernel,
+ .map_iommu = ion_secure_cma_map_iommu,
+ .unmap_iommu = ion_secure_cma_unmap_iommu,
+ .cache_op = ion_secure_cma_cache_ops,
+ .print_debug = ion_secure_cma_print_debug,
+ .secure_buffer = ion_cp_secure_buffer,
+ .unsecure_buffer = ion_cp_unsecure_buffer,
+};
+
+struct ion_heap *ion_secure_cma_heap_create(struct ion_platform_heap *data)
+{
+ struct ion_heap *heap;
+
+ heap = kzalloc(sizeof(struct ion_heap), GFP_KERNEL);
+
+ if (!heap)
+ return ERR_PTR(-ENOMEM);
+
+ heap->ops = &ion_secure_cma_ops;
+ /* set device as private heaps data, later it will be
+ * used to make the link with reserved CMA memory */
+ heap->priv = data->priv;
+ heap->type = ION_HEAP_TYPE_SECURE_DMA;
+ cma_heap_has_outer_cache = data->has_outer_cache;
+ return heap;
+}
+
+void ion_secure_cma_heap_destroy(struct ion_heap *heap)
+{
+ kfree(heap);
+}
diff --git a/drivers/gpu/ion/ion_cp_heap.c b/drivers/gpu/ion/ion_cp_heap.c
index 41ff28d..88addab 100644
--- a/drivers/gpu/ion/ion_cp_heap.c
+++ b/drivers/gpu/ion/ion_cp_heap.c
@@ -23,11 +23,12 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
+#include <linux/vmalloc.h>
#include <linux/memory_alloc.h>
#include <linux/seq_file.h>
-#include <linux/fmem.h>
#include <linux/iommu.h>
#include <linux/dma-mapping.h>
+#include <trace/events/kmem.h>
#include <asm/mach/map.h>
@@ -68,8 +69,6 @@
* user space.
* @iommu_iova: saved iova when mapping full heap at once.
* @iommu_partition: partition used to map full heap.
- * @reusable: indicates if the memory should be reused via fmem.
- * @reserved_vrange: reserved virtual address range for use with fmem
* @iommu_map_all: Indicates whether we should map whole heap into IOMMU.
* @iommu_2x_map_domain: Indicates the domain to use for overmapping.
* @has_outer_cache: set to 1 if outer cache is used, 0 otherwise.
@@ -93,7 +92,6 @@
unsigned long umap_count;
unsigned long iommu_iova[MAX_DOMAINS];
unsigned long iommu_partition[MAX_DOMAINS];
- int reusable;
void *reserved_vrange;
int iommu_map_all;
int iommu_2x_map_domain;
@@ -103,7 +101,7 @@
size_t heap_size;
dma_addr_t handle;
int cma;
- int disallow_non_secure_allocation;
+ int allow_non_secure_allocation;
};
enum {
@@ -111,17 +109,8 @@
HEAP_PROTECTED = 1,
};
-#define DMA_ALLOC_RETRIES 5
+#define DMA_ALLOC_TRIES 5
-static int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
- unsigned int permission_type, int version,
- void *data);
-
-static int ion_cp_unprotect_mem(unsigned int phy_base, unsigned int size,
- unsigned int permission_type, int version,
- void *data);
-
-#if 0
static int allocate_heap_memory(struct ion_heap *heap)
{
struct device *dev = heap->priv;
@@ -136,14 +125,16 @@
if (cp_heap->cpu_addr)
return 0;
- while (!cp_heap->cpu_addr && (++tries < DMA_ALLOC_RETRIES)) {
+ while (!cp_heap->cpu_addr && (++tries < DMA_ALLOC_TRIES)) {
cp_heap->cpu_addr = dma_alloc_attrs(dev,
cp_heap->heap_size,
&(cp_heap->handle),
0,
&attrs);
- if (!cp_heap->cpu_addr)
+ if (!cp_heap->cpu_addr) {
+ trace_ion_cp_alloc_retry(tries);
msleep(20);
+ }
}
if (!cp_heap->cpu_addr)
@@ -170,7 +161,7 @@
out:
return ION_CP_ALLOCATE_FAIL;
}
-#endif
+
static void free_heap_memory(struct ion_heap *heap)
{
struct device *dev = heap->priv;
@@ -197,19 +188,12 @@
return cp_heap->kmap_cached_count + cp_heap->kmap_uncached_count;
}
-#if 0
static int ion_on_first_alloc(struct ion_heap *heap)
{
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
int ret_value;
- if (cp_heap->reusable) {
- ret_value = fmem_set_state(FMEM_C_STATE);
- if (ret_value)
- return 1;
- }
-
if (cp_heap->cma) {
ret_value = allocate_heap_memory(heap);
if (ret_value)
@@ -217,18 +201,12 @@
}
return 0;
}
-#endif
static void ion_on_last_free(struct ion_heap *heap)
{
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
- if (cp_heap->reusable)
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state\n",
- __func__);
-
if (cp_heap->cma)
free_heap_memory(heap);
}
@@ -246,31 +224,6 @@
if (atomic_inc_return(&cp_heap->protect_cnt) == 1) {
/* Make sure we are in C state when the heap is protected. */
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- ret_value = fmem_set_state(FMEM_C_STATE);
- if (ret_value)
- goto out;
- }
-
- ret_value = ion_cp_protect_mem(cp_heap->secure_base,
- cp_heap->secure_size, cp_heap->permission_type,
- version, data);
- if (ret_value) {
- pr_err("Failed to protect memory for heap %s - "
- "error code: %d\n", heap->name, ret_value);
-
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state\n",
- __func__);
- }
- atomic_dec(&cp_heap->protect_cnt);
- } else {
- cp_heap->heap_protected = HEAP_PROTECTED;
- pr_debug("Protected heap %s @ 0x%lx\n",
- heap->name, cp_heap->base);
- }
-#if 0
if (!cp_heap->allocated_bytes)
if (ion_on_first_alloc(heap))
goto out;
@@ -288,10 +241,9 @@
atomic_dec(&cp_heap->protect_cnt);
} else {
cp_heap->heap_protected = HEAP_PROTECTED;
- pr_debug("Protected heap %s @ 0x%lx\n",
- heap->name, cp_heap->base);
+ pr_debug("Protected heap %s @ 0x%pa\n",
+ heap->name, &cp_heap->base);
}
-#endif
}
out:
pr_debug("%s: protect count is %d\n", __func__,
@@ -337,8 +289,8 @@
unsigned long flags)
{
unsigned long offset;
- unsigned long secure_allocation = flags & ION_SECURE;
- unsigned long force_contig = flags & ION_FORCE_CONTIGUOUS;
+ unsigned long secure_allocation = flags & ION_FLAG_SECURE;
+ unsigned long force_contig = flags & ION_FLAG_FORCE_CONTIGUOUS;
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
@@ -352,31 +304,25 @@
}
if (!force_contig && !secure_allocation &&
- cp_heap->disallow_non_secure_allocation) {
+ !cp_heap->allow_non_secure_allocation) {
mutex_unlock(&cp_heap->lock);
pr_debug("%s: non-secure allocation disallowed from this heap\n",
__func__);
return ION_CP_ALLOCATE_FAIL;
}
- if (secure_allocation &&
- (cp_heap->umap_count > 0 || cp_heap->kmap_cached_count > 0)) {
- mutex_unlock(&cp_heap->lock);
- pr_err("ION cannot allocate secure memory from heap with "
- "outstanding mappings: User space: %lu, kernel space "
- "(cached): %lu\n", cp_heap->umap_count,
- cp_heap->kmap_cached_count);
- return ION_CP_ALLOCATE_FAIL;
+ /*
+ * The check above already checked for non-secure allocations when the
+ * heap is protected. HEAP_PROTECTED implies that this must be a secure
+ * allocation. If the heap is protected and there are userspace or
+ * cached kernel mappings, something has gone wrong in the security
+ * model.
+ */
+ if (cp_heap->heap_protected == HEAP_PROTECTED) {
+ BUG_ON(cp_heap->umap_count != 0);
+ BUG_ON(cp_heap->kmap_cached_count != 0);
}
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- if (fmem_set_state(FMEM_C_STATE) != 0) {
- mutex_unlock(&cp_heap->lock);
- return ION_RESERVED_ALLOCATE_FAIL;
- }
- }
-
-#if 0
/*
* if this is the first reusable allocation, transition
* the heap
@@ -387,7 +333,6 @@
return ION_RESERVED_ALLOCATE_FAIL;
}
-#endif
cp_heap->allocated_bytes += size;
mutex_unlock(&cp_heap->lock);
@@ -482,7 +427,9 @@
struct ion_buffer *buffer,
ion_phys_addr_t *addr, size_t *len)
{
- *addr = buffer->priv_phys;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+
+ *addr = buf->buffer;
*len = buffer->size;
return 0;
}
@@ -492,39 +439,58 @@
unsigned long size, unsigned long align,
unsigned long flags)
{
- buffer->priv_phys = ion_cp_allocate(heap, size, align, flags);
- return buffer->priv_phys == ION_CP_ALLOCATE_FAIL ? -ENOMEM : 0;
+ struct ion_cp_buffer *buf;
+ phys_addr_t addr;
+
+ /*
+ * we never want Ion to fault pages in for us with this
+ * heap. We want to set up the mappings ourselves in .map_user
+ */
+ flags |= ION_FLAG_CACHED_NEEDS_SYNC;
+
+ buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+ if (!buf)
+ return ION_CP_ALLOCATE_FAIL;
+
+ addr = ion_cp_allocate(heap, size, align, flags);
+ if (addr == ION_CP_ALLOCATE_FAIL)
+ return -ENOMEM;
+
+ buf->buffer = addr;
+ buf->want_delayed_unsecure = 0;
+ atomic_set(&buf->secure_cnt, 0);
+ mutex_init(&buf->lock);
+ buf->is_secure = flags & ION_FLAG_SECURE ? 1 : 0;
+ buffer->priv_virt = buf;
+
+ return 0;
}
static void ion_cp_heap_free(struct ion_buffer *buffer)
{
struct ion_heap *heap = buffer->heap;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
- ion_cp_free(heap, buffer->priv_phys, buffer->size);
- buffer->priv_phys = ION_CP_ALLOCATE_FAIL;
+ ion_cp_free(heap, buf->buffer, buffer->size);
+ WARN_ON(atomic_read(&buf->secure_cnt));
+ WARN_ON(atomic_read(&buf->map_cnt));
+ kfree(buf);
+
+ buffer->priv_virt = NULL;
}
struct sg_table *ion_cp_heap_create_sg_table(struct ion_buffer *buffer)
{
- struct sg_table *table;
- int ret;
+ size_t chunk_size = buffer->size;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
- table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
- if (!table)
- return ERR_PTR(-ENOMEM);
+ if (ION_IS_CACHED(buffer->flags))
+ chunk_size = PAGE_SIZE;
+ else if (buf->is_secure && IS_ALIGNED(buffer->size, SZ_1M))
+ chunk_size = SZ_1M;
- ret = sg_alloc_table(table, 1, GFP_KERNEL);
- if (ret)
- goto err0;
-
- table->sgl->length = buffer->size;
- table->sgl->offset = 0;
- table->sgl->dma_address = buffer->priv_phys;
-
- return table;
-err0:
- kfree(table);
- return ERR_PTR(ret);
+ return ion_create_chunked_sg_table(buf->buffer, chunk_size,
+ buffer->size);
}
struct sg_table *ion_cp_heap_map_dma(struct ion_heap *heap,
@@ -568,33 +534,12 @@
return ret_value;
}
-void *ion_map_fmem_buffer(struct ion_buffer *buffer, unsigned long phys_base,
- void *virt_base, unsigned long flags)
-{
- int ret;
- unsigned int offset = buffer->priv_phys - phys_base;
- unsigned long start = ((unsigned long)virt_base) + offset;
- const struct mem_type *type = ION_IS_CACHED(flags) ?
- get_mem_type(MT_DEVICE_CACHED) :
- get_mem_type(MT_DEVICE);
-
- if (phys_base > buffer->priv_phys)
- return NULL;
-
-
- ret = ioremap_pages(start, buffer->priv_phys, buffer->size, type);
-
- if (!ret)
- return (void *)start;
- else
- return NULL;
-}
-
void *ion_cp_heap_map_kernel(struct ion_heap *heap, struct ion_buffer *buffer)
{
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
void *ret_value = NULL;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
mutex_lock(&cp_heap->lock);
if ((cp_heap->heap_protected == HEAP_NOT_PROTECTED) ||
@@ -606,33 +551,35 @@
return NULL;
}
- if (cp_heap->reusable) {
- ret_value = ion_map_fmem_buffer(buffer, cp_heap->base,
- cp_heap->reserved_vrange, buffer->flags);
- } else if (cp_heap->cma) {
+ if (cp_heap->cma) {
int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
struct page **pages = vmalloc(
sizeof(struct page *) * npages);
int i;
pgprot_t pgprot;
+ if (!pages) {
+ mutex_unlock(&cp_heap->lock);
+ return ERR_PTR(-ENOMEM);
+ }
+
if (ION_IS_CACHED(buffer->flags))
pgprot = PAGE_KERNEL;
else
pgprot = pgprot_writecombine(PAGE_KERNEL);
for (i = 0; i < npages; i++) {
- pages[i] = phys_to_page(buffer->priv_phys +
+ pages[i] = phys_to_page(buf->buffer +
i * PAGE_SIZE);
}
ret_value = vmap(pages, npages, VM_IOREMAP, pgprot);
vfree(pages);
} else {
if (ION_IS_CACHED(buffer->flags))
- ret_value = ioremap_cached(buffer->priv_phys,
+ ret_value = ioremap_cached(buf->buffer,
buffer->size);
else
- ret_value = ioremap(buffer->priv_phys,
+ ret_value = ioremap(buf->buffer,
buffer->size);
}
@@ -643,6 +590,7 @@
++cp_heap->kmap_cached_count;
else
++cp_heap->kmap_uncached_count;
+ atomic_inc(&buf->map_cnt);
}
}
mutex_unlock(&cp_heap->lock);
@@ -654,10 +602,9 @@
{
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
+ struct ion_cp_buffer *buf = buffer->priv_virt;
- if (cp_heap->reusable)
- unmap_kernel_range((unsigned long)buffer->vaddr, buffer->size);
- else if (cp_heap->cma)
+ if (cp_heap->cma)
vunmap(buffer->vaddr);
else
__arm_iounmap(buffer->vaddr);
@@ -669,6 +616,8 @@
--cp_heap->kmap_cached_count;
else
--cp_heap->kmap_uncached_count;
+
+ atomic_dec(&buf->map_cnt);
ion_cp_release_region(cp_heap);
mutex_unlock(&cp_heap->lock);
@@ -681,9 +630,10 @@
int ret_value = -EAGAIN;
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
+ struct ion_cp_buffer *buf = buffer->priv_virt;
mutex_lock(&cp_heap->lock);
- if (cp_heap->heap_protected == HEAP_NOT_PROTECTED) {
+ if (cp_heap->heap_protected == HEAP_NOT_PROTECTED && !buf->is_secure) {
if (ion_cp_request_region(cp_heap)) {
mutex_unlock(&cp_heap->lock);
return -EINVAL;
@@ -694,14 +644,17 @@
vma->vm_page_prot);
ret_value = remap_pfn_range(vma, vma->vm_start,
- __phys_to_pfn(buffer->priv_phys) + vma->vm_pgoff,
+ __phys_to_pfn(buf->buffer) + vma->vm_pgoff,
vma->vm_end - vma->vm_start,
vma->vm_page_prot);
- if (ret_value)
+ if (ret_value) {
ion_cp_release_region(cp_heap);
- else
+ } else {
+ atomic_inc(&buf->map_cnt);
++cp_heap->umap_count;
+ }
+
}
mutex_unlock(&cp_heap->lock);
return ret_value;
@@ -712,9 +665,11 @@
{
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
+ struct ion_cp_buffer *buf = buffer->priv_virt;
mutex_lock(&cp_heap->lock);
--cp_heap->umap_count;
+ atomic_dec(&buf->map_cnt);
ion_cp_release_region(cp_heap);
mutex_unlock(&cp_heap->lock);
}
@@ -723,29 +678,82 @@
void *vaddr, unsigned int offset, unsigned int length,
unsigned int cmd)
{
- void (*outer_cache_op)(phys_addr_t, phys_addr_t);
+ void (*outer_cache_op)(phys_addr_t, phys_addr_t) = NULL;
struct ion_cp_heap *cp_heap =
- container_of(heap, struct ion_cp_heap, heap);
+ container_of(heap, struct ion_cp_heap, heap);
+ unsigned int size_to_vmap, total_size;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+ int i, j;
+ void *ptr = NULL;
+ ion_phys_addr_t buff_phys = buffer->priv_phys;
- switch (cmd) {
- case ION_IOC_CLEAN_CACHES:
- dmac_clean_range(vaddr, vaddr + length);
- outer_cache_op = outer_clean_range;
- break;
- case ION_IOC_INV_CACHES:
- dmac_inv_range(vaddr, vaddr + length);
- outer_cache_op = outer_inv_range;
- break;
- case ION_IOC_CLEAN_INV_CACHES:
- dmac_flush_range(vaddr, vaddr + length);
- outer_cache_op = outer_flush_range;
- break;
- default:
- return -EINVAL;
+ if (!vaddr) {
+ /*
+ * Split the vmalloc space into smaller regions in
+ * order to clean and/or invalidate the cache.
+ */
+ size_to_vmap = (VMALLOC_END - VMALLOC_START)/8;
+ total_size = buffer->size;
+ for (i = 0; i < total_size; i += size_to_vmap) {
+ size_to_vmap = min(size_to_vmap, total_size - i);
+ for (j = 0; j < 10 && size_to_vmap; ++j) {
+ ptr = ioremap(buff_phys, size_to_vmap);
+ if (ptr) {
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ dmac_clean_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_clean_range;
+ break;
+ case ION_IOC_INV_CACHES:
+ dmac_inv_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_inv_range;
+ break;
+ case ION_IOC_CLEAN_INV_CACHES:
+ dmac_flush_range(ptr,
+ ptr + size_to_vmap);
+ outer_cache_op =
+ outer_flush_range;
+ break;
+ default:
+ return -EINVAL;
+ }
+ buff_phys += size_to_vmap;
+ break;
+ } else {
+ size_to_vmap >>= 1;
+ }
+ }
+ if (!ptr) {
+ pr_err("Couldn't io-remap the memory\n");
+ return -EINVAL;
+ }
+ iounmap(ptr);
+ }
+ } else {
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ dmac_clean_range(vaddr, vaddr + length);
+ outer_cache_op = outer_clean_range;
+ break;
+ case ION_IOC_INV_CACHES:
+ dmac_inv_range(vaddr, vaddr + length);
+ outer_cache_op = outer_inv_range;
+ break;
+ case ION_IOC_CLEAN_INV_CACHES:
+ dmac_flush_range(vaddr, vaddr + length);
+ outer_cache_op = outer_flush_range;
+ break;
+ default:
+ return -EINVAL;
+ }
}
if (cp_heap->has_outer_cache) {
- unsigned long pstart = buffer->priv_phys + offset;
+ unsigned long pstart = buf->buffer + offset;
outer_cache_op(pstart, pstart + length);
}
return 0;
@@ -775,7 +783,6 @@
seq_printf(s, "umapping count: %lx\n", umap_count);
seq_printf(s, "kmapping count: %lx\n", kmap_count);
seq_printf(s, "heap protected: %s\n", heap_protected ? "Yes" : "No");
- seq_printf(s, "reusable: %s\n", cp_heap->reusable ? "Yes" : "No");
if (mem_map) {
unsigned long base = cp_heap->base;
@@ -795,8 +802,11 @@
const char *client_name = "(null)";
if (last_end < data->addr) {
- seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
- "FREE", last_end, data->addr-1,
+ phys_addr_t da;
+
+ da = data->addr-1;
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ "FREE", &last_end, &da,
data->addr-last_end,
data->addr-last_end);
}
@@ -804,9 +814,9 @@
if (data->client_name)
client_name = data->client_name;
- seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
- client_name, data->addr,
- data->addr_end,
+ seq_printf(s, "%16.s %14pa %14pa %14lu (%lx)\n",
+ client_name, &data->addr,
+ &data->addr_end,
data->size, data->size);
last_end = data->addr_end+1;
}
@@ -902,6 +912,7 @@
}
if (domain_num == cp_heap->iommu_2x_map_domain)
ret_value = msm_iommu_map_extra(domain, temp_iova,
+ cp_heap->base,
cp_heap->total_size,
SZ_64K, prot);
if (ret_value)
@@ -933,25 +944,26 @@
struct ion_cp_heap *cp_heap =
container_of(buffer->heap, struct ion_cp_heap, heap);
int prot = IOMMU_WRITE | IOMMU_READ;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
prot |= ION_IS_CACHED(flags) ? IOMMU_CACHE : 0;
data->mapped_size = iova_length;
if (!msm_use_iommu()) {
- data->iova_addr = buffer->priv_phys;
+ data->iova_addr = buf->buffer;
return 0;
}
if (cp_heap->iommu_iova[domain_num]) {
/* Already mapped. */
- unsigned long offset = buffer->priv_phys - cp_heap->base;
+ unsigned long offset = buf->buffer - cp_heap->base;
data->iova_addr = cp_heap->iommu_iova[domain_num] + offset;
return 0;
} else if (cp_heap->iommu_map_all) {
ret = iommu_map_all(domain_num, cp_heap, partition_num, prot);
if (!ret) {
unsigned long offset =
- buffer->priv_phys - cp_heap->base;
+ buf->buffer - cp_heap->base;
data->iova_addr =
cp_heap->iommu_iova[domain_num] + offset;
cp_heap->iommu_partition[domain_num] = partition_num;
@@ -994,8 +1006,9 @@
if (extra) {
unsigned long extra_iova_addr = data->iova_addr + buffer->size;
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra,
- SZ_4K, prot);
+ unsigned long phys_addr = sg_phys(buffer->sg_table->sgl);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
@@ -1061,6 +1074,8 @@
.unsecure_heap = ion_cp_unsecure_heap,
.map_iommu = ion_cp_heap_map_iommu,
.unmap_iommu = ion_cp_heap_unmap_iommu,
+ .secure_buffer = ion_cp_secure_buffer,
+ .unsecure_buffer = ion_cp_unsecure_buffer,
};
struct ion_heap *ion_cp_heap_create(struct ion_platform_heap *heap_data)
@@ -1092,8 +1107,6 @@
if (heap_data->extra_data) {
struct ion_cp_heap_pdata *extra_data =
heap_data->extra_data;
- cp_heap->reusable = extra_data->reusable;
- cp_heap->reserved_vrange = extra_data->virt_addr;
cp_heap->permission_type = extra_data->permission_type;
if (extra_data->secure_size) {
cp_heap->secure_base = extra_data->secure_base;
@@ -1112,8 +1125,8 @@
cp_heap->iommu_2x_map_domain =
extra_data->iommu_2x_map_domain;
cp_heap->cma = extra_data->is_cma;
- cp_heap->disallow_non_secure_allocation =
- extra_data->no_nonsecure_alloc;
+ cp_heap->allow_non_secure_allocation =
+ extra_data->allow_nonsecure_alloc;
}
@@ -1163,108 +1176,4 @@
*size = cp_heap->total_size;
}
-/* SCM related code for locking down memory for content protection */
-#define SCM_CP_LOCK_CMD_ID 0x1
-#define SCM_CP_PROTECT 0x1
-#define SCM_CP_UNPROTECT 0x0
-
-struct cp_lock_msg {
- unsigned int start;
- unsigned int end;
- unsigned int permission_type;
- unsigned char lock;
-} __attribute__ ((__packed__));
-
-static int ion_cp_protect_mem_v1(unsigned int phy_base, unsigned int size,
- unsigned int permission_type)
-{
- struct cp_lock_msg cmd;
- cmd.start = phy_base;
- cmd.end = phy_base + size;
- cmd.permission_type = permission_type;
- cmd.lock = SCM_CP_PROTECT;
-
- return scm_call(SCM_SVC_CP, SCM_CP_LOCK_CMD_ID,
- &cmd, sizeof(cmd), NULL, 0);
-}
-
-static int ion_cp_unprotect_mem_v1(unsigned int phy_base, unsigned int size,
- unsigned int permission_type)
-{
- struct cp_lock_msg cmd;
- cmd.start = phy_base;
- cmd.end = phy_base + size;
- cmd.permission_type = permission_type;
- cmd.lock = SCM_CP_UNPROTECT;
-
- return scm_call(SCM_SVC_CP, SCM_CP_LOCK_CMD_ID,
- &cmd, sizeof(cmd), NULL, 0);
-}
-
-#define V2_CHUNK_SIZE SZ_1M
-
-static int ion_cp_change_mem_v2(unsigned int phy_base, unsigned int size,
- void *data, int lock)
-{
- enum cp_mem_usage usage = (enum cp_mem_usage) data;
- unsigned long *chunk_list;
- int nchunks;
- int ret;
- int i;
-
- if (usage < 0 || usage >= MAX_USAGE)
- return -EINVAL;
-
- if (!IS_ALIGNED(size, V2_CHUNK_SIZE)) {
- pr_err("%s: heap size is not aligned to %x\n",
- __func__, V2_CHUNK_SIZE);
- return -EINVAL;
- }
-
- nchunks = size / V2_CHUNK_SIZE;
-
- chunk_list = allocate_contiguous_ebi(sizeof(unsigned long)*nchunks,
- SZ_4K, 0);
- if (!chunk_list)
- return -ENOMEM;
-
- for (i = 0; i < nchunks; i++)
- chunk_list[i] = phy_base + i * V2_CHUNK_SIZE;
-
- ret = ion_cp_change_chunks_state(memory_pool_node_paddr(chunk_list),
- nchunks, V2_CHUNK_SIZE, usage, lock);
-
- free_contiguous_memory(chunk_list);
- return ret;
-}
-
-static int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
- unsigned int permission_type, int version,
- void *data)
-{
- switch (version) {
- case ION_CP_V1:
- return ion_cp_protect_mem_v1(phy_base, size, permission_type);
- case ION_CP_V2:
- return ion_cp_change_mem_v2(phy_base, size, data,
- SCM_CP_PROTECT);
- default:
- return -EINVAL;
- }
-}
-
-static int ion_cp_unprotect_mem(unsigned int phy_base, unsigned int size,
- unsigned int permission_type, int version,
- void *data)
-{
- switch (version) {
- case ION_CP_V1:
- return ion_cp_unprotect_mem_v1(phy_base, size, permission_type);
- case ION_CP_V2:
- return ion_cp_change_mem_v2(phy_base, size, data,
- SCM_CP_UNPROTECT);
- default:
- return -EINVAL;
- }
-}
diff --git a/drivers/gpu/ion/ion_heap.c b/drivers/gpu/ion/ion_heap.c
index 98c1a8c..510b9ce 100644
--- a/drivers/gpu/ion/ion_heap.c
+++ b/drivers/gpu/ion/ion_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -18,13 +18,12 @@
#include <linux/err.h>
#include <linux/ion.h>
#include "ion_priv.h"
-#include <linux/msm_ion.h>
struct ion_heap *ion_heap_create(struct ion_platform_heap *heap_data)
{
struct ion_heap *heap = NULL;
- switch ((int) heap_data->type) {
+ switch (heap_data->type) {
case ION_HEAP_TYPE_SYSTEM_CONTIG:
heap = ion_system_contig_heap_create(heap_data);
break;
@@ -34,17 +33,6 @@
case ION_HEAP_TYPE_CARVEOUT:
heap = ion_carveout_heap_create(heap_data);
break;
- case ION_HEAP_TYPE_IOMMU:
- heap = ion_iommu_heap_create(heap_data);
- break;
- case ION_HEAP_TYPE_CP:
- heap = ion_cp_heap_create(heap_data);
- break;
-#ifdef CONFIG_CMA
- case ION_HEAP_TYPE_DMA:
- heap = ion_cma_heap_create(heap_data);
- break;
-#endif
default:
pr_err("%s: Invalid heap type %d\n", __func__,
heap_data->type);
@@ -52,9 +40,9 @@
}
if (IS_ERR_OR_NULL(heap)) {
- pr_err("%s: error creating heap %s type %d base %lu size %u\n",
+ pr_err("%s: error creating heap %s type %d base %pa size %u\n",
__func__, heap_data->name, heap_data->type,
- heap_data->base, heap_data->size);
+ &heap_data->base, heap_data->size);
return ERR_PTR(-EINVAL);
}
@@ -69,7 +57,7 @@
if (!heap)
return;
- switch ((int) heap->type) {
+ switch (heap->type) {
case ION_HEAP_TYPE_SYSTEM_CONTIG:
ion_system_contig_heap_destroy(heap);
break;
@@ -79,17 +67,6 @@
case ION_HEAP_TYPE_CARVEOUT:
ion_carveout_heap_destroy(heap);
break;
- case ION_HEAP_TYPE_IOMMU:
- ion_iommu_heap_destroy(heap);
- break;
- case ION_HEAP_TYPE_CP:
- ion_cp_heap_destroy(heap);
- break;
-#ifdef CONFIG_CMA
- case ION_HEAP_TYPE_DMA:
- ion_cma_heap_destroy(heap);
- break;
-#endif
default:
pr_err("%s: Invalid heap type %d\n", __func__,
heap->type);
diff --git a/drivers/gpu/ion/ion_iommu_heap.c b/drivers/gpu/ion/ion_iommu_heap.c
index 3a32390..512ebf3 100644
--- a/drivers/gpu/ion/ion_iommu_heap.c
+++ b/drivers/gpu/ion/ion_iommu_heap.c
@@ -48,8 +48,6 @@
#define MAX_VMAP_RETRIES 10
-atomic_t v = ATOMIC_INIT(0);
-
static const unsigned int orders[] = {8, 4, 0};
static const int num_orders = ARRAY_SIZE(orders);
@@ -72,13 +70,21 @@
int i;
for (i = 0; i < num_orders; i++) {
+ gfp_t gfp;
if (size < order_to_size(orders[i]))
continue;
if (max_order < orders[i])
continue;
- page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM | __GFP_COMP,
- orders[i]);
+ gfp = __GFP_HIGHMEM;
+
+ if (orders[i]) {
+ gfp |= __GFP_COMP | __GFP_NORETRY |
+ __GFP_NO_KSWAPD | __GFP_NOWARN;
+ } else {
+ gfp |= GFP_KERNEL;
+ }
+ page = alloc_pages(gfp, orders[i]);
if (!page)
continue;
@@ -107,7 +113,7 @@
void *ptr = NULL;
unsigned int npages_to_vmap, total_pages, num_large_pages = 0;
long size_remaining = PAGE_ALIGN(size);
- unsigned int max_order = orders[0];
+ unsigned int max_order = ION_IS_CACHED(flags) ? 0 : orders[0];
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
@@ -199,9 +205,6 @@
DMA_BIDIRECTIONAL);
buffer->priv_virt = data;
-
- atomic_add(data->size, &v);
-
return 0;
} else {
@@ -246,19 +249,10 @@
sg_free_table(table);
kfree(table);
table = 0;
-
- atomic_sub(data->size, &v);
-
kfree(data->pages);
kfree(data);
}
-int ion_iommu_heap_dump_size(void)
-{
- int ret = atomic_read(&v);
- return ret;
-}
-
void *ion_iommu_heap_map_kernel(struct ion_heap *heap,
struct ion_buffer *buffer)
{
@@ -269,7 +263,7 @@
return NULL;
if (!ION_IS_CACHED(buffer->flags))
- page_prot = pgprot_noncached(page_prot);
+ page_prot = pgprot_writecombine(page_prot);
buffer->vaddr = vmap(data->pages, data->nrpages, VM_IOREMAP, page_prot);
@@ -340,6 +334,14 @@
data->mapped_size = iova_length;
extra = iova_length - buffer->size;
+ /* Use the biggest alignment to allow bigger IOMMU mappings.
+ * Use the first entry since the first entry will always be the
+ * biggest entry. To take advantage of bigger mapping sizes both the
+ * VA and PA addresses have to be aligned to the biggest size.
+ */
+ if (buffer->sg_table->sgl->length > align)
+ align = buffer->sg_table->sgl->length;
+
ret = msm_allocate_iova_address(domain_num, partition_num,
data->mapped_size, align,
&data->iova_addr);
@@ -365,8 +367,9 @@
if (extra) {
unsigned long extra_iova_addr = data->iova_addr + buffer->size;
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
- prot);
+ unsigned long phys_addr = sg_phys(buffer->sg_table->sgl);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
@@ -418,15 +421,30 @@
switch (cmd) {
case ION_IOC_CLEAN_CACHES:
- dmac_clean_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ else
+ dmac_clean_range(vaddr, vaddr + length);
outer_cache_op = outer_clean_range;
break;
case ION_IOC_INV_CACHES:
- dmac_inv_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ else
+ dmac_inv_range(vaddr, vaddr + length);
outer_cache_op = outer_inv_range;
break;
case ION_IOC_CLEAN_INV_CACHES:
- dmac_flush_range(vaddr, vaddr + length);
+ if (!vaddr) {
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ } else {
+ dmac_flush_range(vaddr, vaddr + length);
+ }
outer_cache_op = outer_flush_range;
break;
default:
diff --git a/drivers/gpu/ion/ion_priv.h b/drivers/gpu/ion/ion_priv.h
index d494f7a..28ef1a5 100644
--- a/drivers/gpu/ion/ion_priv.h
+++ b/drivers/gpu/ion/ion_priv.h
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_priv.h
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -98,7 +98,8 @@
void *vaddr;
int dmap_cnt;
struct sg_table *sg_table;
- int umap_cnt;
+ unsigned long *dirty;
+ struct list_head vmas;
unsigned int iommu_map_cnt;
struct rb_root iommu_maps;
int marked;
@@ -147,6 +148,9 @@
const struct rb_root *mem_map);
int (*secure_heap)(struct ion_heap *heap, int version, void *data);
int (*unsecure_heap)(struct ion_heap *heap, int version, void *data);
+ int (*secure_buffer)(struct ion_buffer *buffer, int version,
+ void *data, int flags);
+ int (*unsecure_buffer)(struct ion_buffer *buffer, int force_unsecure);
};
/**
@@ -177,6 +181,15 @@
};
/**
+ * ion_buffer_fault_user_mappings - fault in user mappings of this buffer
+ * @buffer: buffer
+ *
+ * indicates whether userspace mappings of this buffer will be faulted
+ * in, this can affect how buffers are allocated from the heap.
+ */
+bool ion_buffer_fault_user_mappings(struct ion_buffer *buffer);
+
+/**
* struct mem_map_data - represents information about the memory map for a heap
* @node: rb node used to store in the tree of mem_map_data
* @addr: start address of memory region.
@@ -187,8 +200,8 @@
*/
struct mem_map_data {
struct rb_node node;
- unsigned long addr;
- unsigned long addr_end;
+ ion_phys_addr_t addr;
+ ion_phys_addr_t addr_end;
unsigned long size;
const char *client_name;
};
@@ -259,6 +272,9 @@
#ifdef CONFIG_CMA
struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *);
void ion_cma_heap_destroy(struct ion_heap *);
+
+struct ion_heap *ion_secure_cma_heap_create(struct ion_platform_heap *);
+void ion_secure_cma_heap_destroy(struct ion_heap *);
#endif
struct ion_heap *msm_get_contiguous_heap(void);
@@ -313,4 +329,28 @@
void ion_mem_map_show(struct ion_heap *heap);
+
+
+int ion_secure_handle(struct ion_client *client, struct ion_handle *handle,
+ int version, void *data, int flags);
+
+int ion_unsecure_handle(struct ion_client *client, struct ion_handle *handle);
+
+int ion_heap_allow_secure_allocation(enum ion_heap_type type);
+
+int ion_heap_allow_heap_secure(enum ion_heap_type type);
+
+int ion_heap_allow_handle_secure(enum ion_heap_type type);
+
+/**
+ * ion_create_chunked_sg_table - helper function to create sg table
+ * with specified chunk size
+ * @buffer_base: The starting address used for the sg dma address
+ * @chunk_size: The size of each entry in the sg table
+ * @total_size: The total size of the sg table (i.e. the sum of the
+ * entries). This will be rounded up to the nearest
+ * multiple of `chunk_size'
+ */
+struct sg_table *ion_create_chunked_sg_table(phys_addr_t buffer_base,
+ size_t chunk_size, size_t total_size);
#endif /* _ION_PRIV_H */
diff --git a/drivers/gpu/ion/ion_system_heap.c b/drivers/gpu/ion/ion_system_heap.c
index 980174e..ceb30a4 100644
--- a/drivers/gpu/ion/ion_system_heap.c
+++ b/drivers/gpu/ion/ion_system_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_system_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -15,7 +15,10 @@
*
*/
+#include <asm/page.h>
+#include <linux/dma-mapping.h>
#include <linux/err.h>
+#include <linux/highmem.h>
#include <linux/ion.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
@@ -28,12 +31,44 @@
#include <mach/memory.h>
#include <asm/cacheflush.h>
#include <linux/msm_ion.h>
+#include <linux/dma-mapping.h>
static atomic_t system_heap_allocated;
static atomic_t system_contig_heap_allocated;
static unsigned int system_heap_has_outer_cache;
static unsigned int system_heap_contig_has_outer_cache;
+struct page_info {
+ struct page *page;
+ unsigned long order;
+ struct list_head list;
+};
+
+static struct page_info *alloc_largest_available(unsigned long size,
+ bool split_pages)
+{
+ static unsigned int orders[] = {8, 4, 0};
+ struct page *page;
+ struct page_info *info;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(orders); i++) {
+ if (size < (1 << orders[i]) * PAGE_SIZE)
+ continue;
+ page = alloc_pages(GFP_HIGHUSER | __GFP_ZERO |
+ __GFP_NOWARN | __GFP_NORETRY, orders[i]);
+ if (!page)
+ continue;
+ if (split_pages)
+ split_page(page, orders[i]);
+ info = kmap(page);
+ info->page = page;
+ info->order = orders[i];
+ return info;
+ }
+ return NULL;
+}
+
static int ion_system_heap_allocate(struct ion_heap *heap,
struct ion_buffer *buffer,
unsigned long size, unsigned long align,
@@ -41,31 +76,73 @@
{
struct sg_table *table;
struct scatterlist *sg;
- int i, j;
- int npages = PAGE_ALIGN(size) / PAGE_SIZE;
+ int ret;
+ struct list_head pages;
+ struct page_info *info, *tmp_info;
+ int i = 0;
+ long size_remaining = PAGE_ALIGN(size);
+ bool split_pages = ion_buffer_fault_user_mappings(buffer);
+
+
+ INIT_LIST_HEAD(&pages);
+ while (size_remaining > 0) {
+ info = alloc_largest_available(size_remaining, split_pages);
+ if (!info)
+ goto err;
+ list_add_tail(&info->list, &pages);
+ size_remaining -= (1 << info->order) * PAGE_SIZE;
+ i++;
+ }
table = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
if (!table)
- return -ENOMEM;
- i = sg_alloc_table(table, npages, GFP_KERNEL);
- if (i)
- goto err0;
- for_each_sg(table->sgl, sg, table->nents, i) {
- struct page *page;
- page = alloc_page(GFP_KERNEL|__GFP_ZERO);
- if (!page)
- goto err1;
- sg_set_page(sg, page, PAGE_SIZE, 0);
+ goto err;
+
+ if (split_pages)
+ ret = sg_alloc_table(table, PAGE_ALIGN(size) / PAGE_SIZE,
+ GFP_KERNEL);
+ else
+ ret = sg_alloc_table(table, i, GFP_KERNEL);
+
+ if (ret)
+ goto err1;
+
+ sg = table->sgl;
+ list_for_each_entry_safe(info, tmp_info, &pages, list) {
+ struct page *page = info->page;
+
+ if (split_pages) {
+ for (i = 0; i < (1 << info->order); i++) {
+ sg_set_page(sg, page + i, PAGE_SIZE, 0);
+ sg = sg_next(sg);
+ }
+ } else {
+ sg_set_page(sg, page, (1 << info->order) * PAGE_SIZE,
+ 0);
+ sg = sg_next(sg);
+ }
+ list_del(&info->list);
+ kunmap(page);
}
+
+ dma_sync_sg_for_device(NULL, table->sgl, table->nents,
+ DMA_BIDIRECTIONAL);
+
buffer->priv_virt = table;
atomic_add(size, &system_heap_allocated);
return 0;
err1:
- for_each_sg(table->sgl, sg, i, j)
- __free_page(sg_page(sg));
- sg_free_table(table);
-err0:
kfree(table);
+err:
+ list_for_each_entry(info, &pages, list) {
+ if (split_pages)
+ for (i = 0; i < (1 << info->order); i++)
+ __free_page(info->page + i);
+ else
+ __free_pages(info->page, info->order);
+
+ kunmap(info->page);
+ }
return -ENOMEM;
}
@@ -76,7 +153,7 @@
struct sg_table *table = buffer->priv_virt;
for_each_sg(table->sgl, sg, table->nents, i)
- __free_page(sg_page(sg));
+ __free_pages(sg_page(sg), get_order(sg_dma_len(sg)));
if (buffer->sg_table)
sg_free_table(buffer->sg_table);
kfree(buffer->sg_table);
@@ -98,25 +175,33 @@
void *ion_system_heap_map_kernel(struct ion_heap *heap,
struct ion_buffer *buffer)
{
- if (!ION_IS_CACHED(buffer->flags)) {
- pr_err("%s: cannot map system heap uncached\n", __func__);
- return ERR_PTR(-EINVAL);
- } else {
- struct scatterlist *sg;
- int i;
- void *vaddr;
- struct sg_table *table = buffer->priv_virt;
- struct page **pages = kmalloc(
- sizeof(struct page *) * table->nents,
- GFP_KERNEL);
+ struct scatterlist *sg;
+ int i, j;
+ void *vaddr;
+ pgprot_t pgprot;
+ struct sg_table *table = buffer->priv_virt;
+ int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+ struct page **pages = kzalloc(sizeof(struct page *) * npages,
+ GFP_KERNEL);
+ struct page **tmp = pages;
- for_each_sg(table->sgl, sg, table->nents, i)
- pages[i] = sg_page(sg);
- vaddr = vmap(pages, table->nents, VM_MAP, PAGE_KERNEL);
- kfree(pages);
+ if (buffer->flags & ION_FLAG_CACHED)
+ pgprot = PAGE_KERNEL;
+ else
+ pgprot = pgprot_writecombine(PAGE_KERNEL);
- return vaddr;
+ for_each_sg(table->sgl, sg, table->nents, i) {
+ int npages_this_entry = PAGE_ALIGN(sg_dma_len(sg)) / PAGE_SIZE;
+ struct page *page = sg_page(sg);
+ BUG_ON(i >= npages);
+ for (j = 0; j < npages_this_entry; j++) {
+ *(tmp++) = page++;
+ }
}
+ vaddr = vmap(pages, npages, VM_MAP, pgprot);
+ kfree(pages);
+
+ return vaddr;
}
void ion_system_heap_unmap_kernel(struct ion_heap *heap,
@@ -154,26 +239,27 @@
int ion_system_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
struct vm_area_struct *vma)
{
+ struct sg_table *table = buffer->priv_virt;
+ unsigned long addr = vma->vm_start;
+ unsigned long offset = vma->vm_pgoff;
+ struct scatterlist *sg;
+ int i;
+
if (!ION_IS_CACHED(buffer->flags)) {
pr_err("%s: cannot map system heap uncached\n", __func__);
return -EINVAL;
- } else {
- struct sg_table *table = buffer->priv_virt;
- unsigned long addr = vma->vm_start;
- unsigned long offset = vma->vm_pgoff;
- struct scatterlist *sg;
- int i;
-
- for_each_sg(table->sgl, sg, table->nents, i) {
- if (offset) {
- offset--;
- continue;
- }
- vm_insert_page(vma, addr, sg_page(sg));
- addr += PAGE_SIZE;
- }
- return 0;
}
+
+ for_each_sg(table->sgl, sg, table->nents, i) {
+ if (offset) {
+ offset--;
+ continue;
+ }
+ remap_pfn_range(vma, addr, page_to_pfn(sg_page(sg)),
+ sg_dma_len(sg), vma->vm_page_prot);
+ addr += sg_dma_len(sg);
+ }
+ return 0;
}
int ion_system_heap_cache_ops(struct ion_heap *heap, struct ion_buffer *buffer,
@@ -184,15 +270,30 @@
switch (cmd) {
case ION_IOC_CLEAN_CACHES:
- dmac_clean_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ else
+ dmac_clean_range(vaddr, vaddr + length);
outer_cache_op = outer_clean_range;
break;
case ION_IOC_INV_CACHES:
- dmac_inv_range(vaddr, vaddr + length);
+ if (!vaddr)
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ else
+ dmac_inv_range(vaddr, vaddr + length);
outer_cache_op = outer_inv_range;
break;
case ION_IOC_CLEAN_INV_CACHES:
- dmac_flush_range(vaddr, vaddr + length);
+ if (!vaddr) {
+ dma_sync_sg_for_device(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_TO_DEVICE);
+ dma_sync_sg_for_cpu(NULL, buffer->sg_table->sgl,
+ buffer->sg_table->nents, DMA_FROM_DEVICE);
+ } else {
+ dmac_flush_range(vaddr, vaddr + length);
+ }
outer_cache_op = outer_flush_range;
break;
default:
@@ -255,6 +356,14 @@
data->mapped_size = iova_length;
extra = iova_length - buffer->size;
+ /* Use the biggest alignment to allow bigger IOMMU mappings.
+ * Use the first entry since the first entry will always be the
+ * biggest entry. To take advantage of bigger mapping sizes both the
+ * VA and PA addresses have to be aligned to the biggest size.
+ */
+ if (table->sgl->length > align)
+ align = table->sgl->length;
+
ret = msm_allocate_iova_address(domain_num, partition_num,
data->mapped_size, align,
&data->iova_addr);
@@ -280,8 +389,9 @@
extra_iova_addr = data->iova_addr + buffer->size;
if (extra) {
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
- prot);
+ unsigned long phys_addr = sg_phys(table->sgl);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
@@ -357,7 +467,7 @@
}
struct sg_table *ion_system_contig_heap_map_dma(struct ion_heap *heap,
- struct ion_buffer *buffer)
+ struct ion_buffer *buffer)
{
struct sg_table *table;
int ret;
@@ -375,6 +485,13 @@
return table;
}
+void ion_system_contig_heap_unmap_dma(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ sg_free_table(buffer->sg_table);
+ kfree(buffer->sg_table);
+}
+
int ion_system_contig_heap_map_user(struct ion_heap *heap,
struct ion_buffer *buffer,
struct vm_area_struct *vma)
@@ -483,7 +600,7 @@
}
page = virt_to_page(buffer->vaddr);
- sglist = kmalloc(sizeof(*sglist), GFP_KERNEL);
+ sglist = vmalloc(sizeof(*sglist));
if (!sglist)
goto out1;
@@ -500,32 +617,45 @@
if (extra) {
unsigned long extra_iova_addr = data->iova_addr + buffer->size;
- ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
- prot);
+ unsigned long phys_addr = sg_phys(sglist);
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, phys_addr,
+ extra, SZ_4K, prot);
if (ret)
goto out2;
}
- kfree(sglist);
+ vfree(sglist);
return ret;
out2:
iommu_unmap_range(domain, data->iova_addr, buffer->size);
out1:
- kfree(sglist);
+ vfree(sglist);
msm_free_iova_address(data->iova_addr, domain_num, partition_num,
data->mapped_size);
out:
return ret;
}
+void *ion_system_contig_heap_map_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return buffer->priv_virt;
+}
+
+void ion_system_contig_heap_unmap_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return;
+}
+
static struct ion_heap_ops kmalloc_ops = {
.allocate = ion_system_contig_heap_allocate,
.free = ion_system_contig_heap_free,
.phys = ion_system_contig_heap_phys,
.map_dma = ion_system_contig_heap_map_dma,
- .unmap_dma = ion_system_heap_unmap_dma,
- .map_kernel = ion_system_heap_map_kernel,
- .unmap_kernel = ion_system_heap_unmap_kernel,
+ .unmap_dma = ion_system_contig_heap_unmap_dma,
+ .map_kernel = ion_system_contig_heap_map_kernel,
+ .unmap_kernel = ion_system_contig_heap_unmap_kernel,
.map_user = ion_system_contig_heap_map_user,
.cache_op = ion_system_contig_heap_cache_ops,
.print_debug = ion_system_contig_print_debug,
diff --git a/drivers/gpu/ion/msm/ion_cp_common.c b/drivers/gpu/ion/msm/ion_cp_common.c
index 41e0a04..7d54cfa 100644
--- a/drivers/gpu/ion/msm/ion_cp_common.c
+++ b/drivers/gpu/ion/msm/ion_cp_common.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2011 Google, Inc
- * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -12,12 +12,15 @@
* GNU General Public License for more details.
*/
+#include <linux/memory_alloc.h>
#include <linux/types.h>
#include <mach/scm.h>
+#include "../ion_priv.h"
#include "ion_cp_common.h"
#define MEM_PROTECT_LOCK_ID 0x05
+#define MEM_PROTECT_LOCK_ID2 0x0A
struct cp2_mem_chunks {
unsigned int *chunk_list;
@@ -25,28 +28,275 @@
unsigned int chunk_size;
} __attribute__ ((__packed__));
-struct cp2_lock_req {
+struct cp2_lock2_req {
struct cp2_mem_chunks chunks;
unsigned int mem_usage;
unsigned int lock;
+ unsigned int flags;
} __attribute__ ((__packed__));
+/* SCM related code for locking down memory for content protection */
+
+#define SCM_CP_LOCK_CMD_ID 0x1
+#define SCM_CP_PROTECT 0x1
+#define SCM_CP_UNPROTECT 0x0
+
+struct cp_lock_msg {
+ unsigned int start;
+ unsigned int end;
+ unsigned int permission_type;
+ unsigned char lock;
+} __attribute__ ((__packed__));
+
+static int ion_cp_protect_mem_v1(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type)
+{
+ struct cp_lock_msg cmd;
+ cmd.start = phy_base;
+ cmd.end = phy_base + size;
+ cmd.permission_type = permission_type;
+ cmd.lock = SCM_CP_PROTECT;
+
+ return scm_call(SCM_SVC_MP, SCM_CP_LOCK_CMD_ID,
+ &cmd, sizeof(cmd), NULL, 0);
+}
+
+static int ion_cp_unprotect_mem_v1(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type)
+{
+ struct cp_lock_msg cmd;
+ cmd.start = phy_base;
+ cmd.end = phy_base + size;
+ cmd.permission_type = permission_type;
+ cmd.lock = SCM_CP_UNPROTECT;
+
+ return scm_call(SCM_SVC_MP, SCM_CP_LOCK_CMD_ID,
+ &cmd, sizeof(cmd), NULL, 0);
+}
+
+#define V2_CHUNK_SIZE SZ_1M
+
+static int ion_cp_change_mem_v2(unsigned int phy_base, unsigned int size,
+ void *data, int lock)
+{
+ enum cp_mem_usage usage = (enum cp_mem_usage) data;
+ unsigned long *chunk_list;
+ int nchunks;
+ int ret;
+ int i;
+
+ if (usage < 0 || usage >= MAX_USAGE)
+ return -EINVAL;
+
+ if (!IS_ALIGNED(size, V2_CHUNK_SIZE)) {
+ pr_err("%s: heap size is not aligned to %x\n",
+ __func__, V2_CHUNK_SIZE);
+ return -EINVAL;
+ }
+
+ nchunks = size / V2_CHUNK_SIZE;
+
+ chunk_list = allocate_contiguous_ebi(sizeof(unsigned long)*nchunks,
+ SZ_4K, 0);
+ if (!chunk_list)
+ return -ENOMEM;
+
+ for (i = 0; i < nchunks; i++)
+ chunk_list[i] = phy_base + i * V2_CHUNK_SIZE;
+
+ ret = ion_cp_change_chunks_state(memory_pool_node_paddr(chunk_list),
+ nchunks, V2_CHUNK_SIZE, usage, lock);
+
+ free_contiguous_memory(chunk_list);
+ return ret;
+}
+
+int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data)
+{
+ switch (version) {
+ case ION_CP_V1:
+ return ion_cp_protect_mem_v1(phy_base, size, permission_type);
+ case ION_CP_V2:
+ return ion_cp_change_mem_v2(phy_base, size, data,
+ SCM_CP_PROTECT);
+ default:
+ return -EINVAL;
+ }
+}
+
+int ion_cp_unprotect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data)
+{
+ switch (version) {
+ case ION_CP_V1:
+ return ion_cp_unprotect_mem_v1(phy_base, size, permission_type);
+ case ION_CP_V2:
+ return ion_cp_change_mem_v2(phy_base, size, data,
+ SCM_CP_UNPROTECT);
+ default:
+ return -EINVAL;
+ }
+}
+
int ion_cp_change_chunks_state(unsigned long chunks, unsigned int nchunks,
unsigned int chunk_size,
enum cp_mem_usage usage,
int lock)
{
- struct cp2_lock_req request;
+ struct cp2_lock2_req request;
+ u32 resp;
request.mem_usage = usage;
request.lock = lock;
+ request.flags = 0;
request.chunks.chunk_list = (unsigned int *)chunks;
request.chunks.chunk_list_size = nchunks;
request.chunks.chunk_size = chunk_size;
- return scm_call(SCM_SVC_CP, MEM_PROTECT_LOCK_ID,
- &request, sizeof(request), NULL, 0);
+ return scm_call(SCM_SVC_MP, MEM_PROTECT_LOCK_ID2,
+ &request, sizeof(request), &resp, sizeof(resp));
}
+/* Must be protected by ion_cp_buffer lock */
+static int __ion_cp_protect_buffer(struct ion_buffer *buffer, int version,
+ void *data, int flags)
+{
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+ int ret_value = 0;
+
+ if (atomic_inc_return(&buf->secure_cnt) == 1) {
+ ret_value = ion_cp_protect_mem(buf->buffer,
+ buffer->size, 0,
+ version, data);
+
+ if (ret_value) {
+ pr_err("Failed to secure buffer %p, error %d\n",
+ buffer, ret_value);
+ atomic_dec(&buf->secure_cnt);
+ } else {
+ pr_debug("Protected buffer %p from %pa (size %x)\n",
+ buffer, &buf->buffer,
+ buffer->size);
+ buf->want_delayed_unsecure |=
+ flags & ION_UNSECURE_DELAYED ? 1 : 0;
+ buf->data = data;
+ buf->version = version;
+ }
+ }
+ pr_debug("buffer %p protect count %d\n", buffer,
+ atomic_read(&buf->secure_cnt));
+ BUG_ON(atomic_read(&buf->secure_cnt) < 0);
+ return ret_value;
+}
+
+/* Must be protected by ion_cp_buffer lock */
+static int __ion_cp_unprotect_buffer(struct ion_buffer *buffer, int version,
+ void *data, int force_unsecure)
+{
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+ int ret_value = 0;
+
+ if (force_unsecure) {
+ if (!buf->is_secure || atomic_read(&buf->secure_cnt) == 0)
+ return 0;
+
+ if (atomic_read(&buf->secure_cnt) != 1) {
+ WARN(1, "Forcing unsecure of buffer with outstanding secure count %d!\n",
+ atomic_read(&buf->secure_cnt));
+ atomic_set(&buf->secure_cnt, 1);
+ }
+ }
+
+ if (atomic_dec_and_test(&buf->secure_cnt)) {
+ ret_value = ion_cp_unprotect_mem(
+ buf->buffer, buffer->size,
+ 0, version, data);
+
+ if (ret_value) {
+ pr_err("Failed to unsecure buffer %p, error %d\n",
+ buffer, ret_value);
+ /*
+ * If the force unsecure is happening, the buffer
+ * is being destroyed. We failed to unsecure the
+ * buffer even though the memory is given back.
+ * Just die now rather than discovering later what
+ * happens when trying to use the secured memory as
+ * unsecured...
+ */
+ BUG_ON(force_unsecure);
+ /* Bump the count back up one to try again later */
+ atomic_inc(&buf->secure_cnt);
+ } else {
+ buf->version = -1;
+ buf->data = NULL;
+ }
+ }
+ pr_debug("buffer %p unprotect count %d\n", buffer,
+ atomic_read(&buf->secure_cnt));
+ BUG_ON(atomic_read(&buf->secure_cnt) < 0);
+ return ret_value;
+}
+
+int ion_cp_secure_buffer(struct ion_buffer *buffer, int version, void *data,
+ int flags)
+{
+ int ret_value;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+
+ mutex_lock(&buf->lock);
+ if (!buf->is_secure) {
+ pr_err("%s: buffer %p was not allocated as secure\n",
+ __func__, buffer);
+ ret_value = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (ION_IS_CACHED(buffer->flags)) {
+ pr_err("%s: buffer %p was allocated as cached\n",
+ __func__, buffer);
+ ret_value = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (atomic_read(&buf->map_cnt)) {
+ pr_err("%s: cannot secure buffer %p with outstanding mappings. Total count: %d",
+ __func__, buffer, atomic_read(&buf->map_cnt));
+ ret_value = -EINVAL;
+ goto out_unlock;
+ }
+
+ if (atomic_read(&buf->secure_cnt)) {
+ if (buf->version != version || buf->data != data) {
+ pr_err("%s: Trying to re-secure buffer with different values",
+ __func__);
+ pr_err("Last secured version: %d Currrent %d\n",
+ buf->version, version);
+ pr_err("Last secured data: %p current %p\n",
+ buf->data, data);
+ ret_value = -EINVAL;
+ goto out_unlock;
+ }
+ }
+ ret_value = __ion_cp_protect_buffer(buffer, version, data, flags);
+
+out_unlock:
+ mutex_unlock(&buf->lock);
+ return ret_value;
+}
+
+int ion_cp_unsecure_buffer(struct ion_buffer *buffer, int force_unsecure)
+{
+ int ret_value = 0;
+ struct ion_cp_buffer *buf = buffer->priv_virt;
+
+ mutex_lock(&buf->lock);
+ ret_value = __ion_cp_unprotect_buffer(buffer, buf->version, buf->data,
+ force_unsecure);
+ mutex_unlock(&buf->lock);
+ return ret_value;
+}
diff --git a/drivers/gpu/ion/msm/ion_cp_common.h b/drivers/gpu/ion/msm/ion_cp_common.h
index eec66e6..8ae19be 100644
--- a/drivers/gpu/ion/msm/ion_cp_common.h
+++ b/drivers/gpu/ion/msm/ion_cp_common.h
@@ -20,6 +20,26 @@
#define ION_CP_V1 1
#define ION_CP_V2 2
+struct ion_cp_buffer {
+ phys_addr_t buffer;
+ atomic_t secure_cnt;
+ int is_secure;
+ int want_delayed_unsecure;
+ /*
+ * Currently all user/kernel mapping is protected by the heap lock.
+ * This is sufficient to protect the map count as well. The lock
+ * should be used to protect map_cnt if the whole heap lock is
+ * ever removed.
+ */
+ atomic_t map_cnt;
+ /*
+ * protects secure_cnt for securing.
+ */
+ struct mutex lock;
+ int version;
+ void *data;
+};
+
#if defined(CONFIG_ION_MSM)
/*
* ion_cp2_protect_mem - secures memory via trustzone
@@ -37,6 +57,18 @@
unsigned int chunk_size, enum cp_mem_usage usage,
int lock);
+int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data);
+
+int ion_cp_unprotect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data);
+
+int ion_cp_secure_buffer(struct ion_buffer *buffer, int version, void *data,
+ int flags);
+
+int ion_cp_unsecure_buffer(struct ion_buffer *buffer, int force_unsecure);
#else
static inline int ion_cp_change_chunks_state(unsigned long chunks,
unsigned int nchunks, unsigned int chunk_size,
@@ -44,6 +76,32 @@
{
return -ENODEV;
}
+
+static inline int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data)
+{
+ return -ENODEV;
+}
+
+static inline int ion_cp_unprotect_mem(unsigned int phy_base, unsigned int size,
+ unsigned int permission_type, int version,
+ void *data)
+{
+ return -ENODEV;
+}
+
+static inline int ion_cp_secure_buffer(struct ion_buffer *buffer, int version,
+ void *data, int flags)
+{
+ return -ENODEV;
+}
+
+static inline int ion_cp_unsecure_buffer(struct ion_buffer *buffer,
+ int force_unsecure)
+{
+ return -ENODEV;
+}
#endif
#endif
diff --git a/drivers/gpu/ion/msm/msm_ion.c b/drivers/gpu/ion/msm/msm_ion.c
index ab5d09b..4b55875 100644
--- a/drivers/gpu/ion/msm/msm_ion.c
+++ b/drivers/gpu/ion/msm/msm_ion.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,11 +19,13 @@
#include <linux/memory_alloc.h>
#include <linux/fmem.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
#include <linux/mm.h>
#include <linux/mm_types.h>
#include <linux/sched.h>
#include <linux/rwsem.h>
#include <linux/uaccess.h>
+#include <linux/memblock.h>
#include <mach/ion.h>
#include <mach/msm_memtypes.h>
#include "../ion_priv.h"
@@ -44,6 +46,7 @@
};
+#ifdef CONFIG_OF
static struct ion_heap_desc ion_heap_meta[] = {
{
.id = ION_SYSTEM_HEAP_ID,
@@ -51,8 +54,13 @@
.name = ION_VMALLOC_HEAP_NAME,
},
{
+ .id = ION_SYSTEM_CONTIG_HEAP_ID,
+ .type = ION_HEAP_TYPE_SYSTEM_CONTIG,
+ .name = ION_KMALLOC_HEAP_NAME,
+ },
+ {
.id = ION_CP_MM_HEAP_ID,
- .type = ION_HEAP_TYPE_CP,
+ .type = ION_HEAP_TYPE_SECURE_DMA,
.name = ION_MM_HEAP_NAME,
.permission_type = IPT_TYPE_MM_CARVEOUT,
},
@@ -88,6 +96,16 @@
.name = ION_AUDIO_HEAP_NAME,
},
{
+ .id = ION_PIL1_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_PIL1_HEAP_NAME,
+ },
+ {
+ .id = ION_PIL2_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_PIL2_HEAP_NAME,
+ },
+ {
.id = ION_CP_WB_HEAP_ID,
.type = ION_HEAP_TYPE_CP,
.name = ION_WB_HEAP_NAME,
@@ -97,7 +115,13 @@
.type = ION_HEAP_TYPE_CARVEOUT,
.name = ION_CAMERA_HEAP_NAME,
},
+ {
+ .id = ION_ADSP_HEAP_ID,
+ .type = ION_HEAP_TYPE_DMA,
+ .name = ION_ADSP_HEAP_NAME,
+ }
};
+#endif
struct ion_client *msm_ion_client_create(unsigned int heap_mask,
const char *name)
@@ -130,6 +154,22 @@
}
EXPORT_SYMBOL(msm_ion_unsecure_heap_2_0);
+int msm_ion_secure_buffer(struct ion_client *client, struct ion_handle *handle,
+ enum cp_mem_usage usage,
+ int flags)
+{
+ return ion_secure_handle(client, handle, ION_CP_V2,
+ (void *)usage, flags);
+}
+EXPORT_SYMBOL(msm_ion_secure_buffer);
+
+int msm_ion_unsecure_buffer(struct ion_client *client,
+ struct ion_handle *handle)
+{
+ return ion_unsecure_handle(client, handle);
+}
+EXPORT_SYMBOL(msm_ion_unsecure_buffer);
+
int msm_ion_do_cache_op(struct ion_client *client, struct ion_handle *handle,
void *vaddr, unsigned long len, unsigned int cmd)
{
@@ -137,7 +177,7 @@
}
EXPORT_SYMBOL(msm_ion_do_cache_op);
-static unsigned long msm_ion_get_base(unsigned long size, int memory_type,
+static ion_phys_addr_t msm_ion_get_base(unsigned long size, int memory_type,
unsigned int align)
{
switch (memory_type) {
@@ -305,10 +345,10 @@
static int is_heap_overlapping(const struct ion_platform_heap *heap1,
const struct ion_platform_heap *heap2)
{
- unsigned long heap1_base = heap1->base;
- unsigned long heap2_base = heap2->base;
- unsigned long heap1_end = heap1->base + heap1->size - 1;
- unsigned long heap2_end = heap2->base + heap2->size - 1;
+ ion_phys_addr_t heap1_base = heap1->base;
+ ion_phys_addr_t heap2_base = heap2->base;
+ ion_phys_addr_t heap1_end = heap1->base + heap1->size - 1;
+ ion_phys_addr_t heap2_end = heap2->base + heap2->size - 1;
if (heap1_base == heap2_base)
return 1;
@@ -341,6 +381,7 @@
}
}
+#ifdef CONFIG_OF
static int msm_init_extra_data(struct ion_platform_heap *heap,
const struct ion_heap_desc *heap_desc)
{
@@ -397,6 +438,7 @@
unsigned int i;
for (i = 0; i < pdata->nr; ++i)
kfree(pdata->heaps[i].extra_data);
+ kfree(pdata->heaps);
kfree(pdata);
}
@@ -442,6 +484,7 @@
{
unsigned int val;
int ret = 0;
+ u32 out_values[2];
const char *memory_name_prop;
ret = of_property_read_u32(node, "qcom,memory-reservation-size", &val);
@@ -465,12 +508,29 @@
ret = -EINVAL;
}
} else {
- ret = 0;
+ ret = of_property_read_u32_array(node, "qcom,memory-fixed",
+ out_values, 2);
+ if (!ret)
+ heap->size = out_values[1];
+ else
+ ret = 0;
}
out:
return ret;
}
+static void msm_ion_get_heap_base(struct device_node *node,
+ struct ion_platform_heap *heap)
+{
+ u32 out_values[2];
+ int ret = 0;
+
+ ret = of_property_read_u32_array(node, "qcom,memory-fixed",
+ out_values, 2);
+ if (!ret)
+ heap->base = out_values[0];
+ return;
+}
static void msm_ion_get_heap_adjacent(struct device_node *node,
struct ion_platform_heap *heap)
@@ -504,11 +564,13 @@
}
}
-static struct ion_platform_data *msm_ion_parse_dt(
- const struct device_node *dt_node)
+static struct ion_platform_data *msm_ion_parse_dt(struct platform_device *pdev)
{
struct ion_platform_data *pdata = 0;
+ struct ion_platform_heap *heaps = NULL;
struct device_node *node;
+ struct platform_device *new_dev = NULL;
+ const struct device_node *dt_node = pdev->dev.of_node;
uint32_t val = 0;
int ret = 0;
uint32_t num_heaps = 0;
@@ -520,14 +582,27 @@
if (!num_heaps)
return ERR_PTR(-EINVAL);
- pdata = kzalloc(sizeof(struct ion_platform_data) +
- num_heaps*sizeof(struct ion_platform_heap), GFP_KERNEL);
+ pdata = kzalloc(sizeof(struct ion_platform_data), GFP_KERNEL);
if (!pdata)
return ERR_PTR(-ENOMEM);
+ heaps = kzalloc(sizeof(struct ion_platform_heap)*num_heaps, GFP_KERNEL);
+ if (!heaps) {
+ kfree(pdata);
+ return ERR_PTR(-ENOMEM);
+ }
+
+ pdata->heaps = heaps;
pdata->nr = num_heaps;
for_each_child_of_node(dt_node, node) {
+ new_dev = of_platform_device_create(node, NULL, &pdev->dev);
+ if (!new_dev) {
+ pr_err("Failed to create device %s\n", node->name);
+ goto free_heaps;
+ }
+
+ pdata->heaps[idx].priv = &new_dev->dev;
/**
* TODO: Replace this with of_get_address() when this patch
* gets merged: http://
@@ -544,6 +619,7 @@
if (ret)
goto free_heaps;
+ msm_ion_get_heap_base(node, &pdata->heaps[idx]);
msm_ion_get_heap_align(node, &pdata->heaps[idx]);
ret = msm_ion_get_heap_size(node, &pdata->heaps[idx]);
@@ -560,6 +636,17 @@
free_pdata(pdata);
return ERR_PTR(ret);
}
+#else
+static struct ion_platform_data *msm_ion_parse_dt(struct platform_device *pdev)
+{
+ return NULL;
+}
+
+static void free_pdata(const struct ion_platform_data *pdata)
+{
+
+}
+#endif
static int check_vaddr_bounds(unsigned long start, unsigned long end)
{
@@ -570,22 +657,36 @@
if (end < start)
goto out;
- down_read(&mm->mmap_sem);
vma = find_vma(mm, start);
if (vma && vma->vm_start < end) {
if (start < vma->vm_start)
- goto out_up;
+ goto out;
if (end > vma->vm_end)
- goto out_up;
+ goto out;
ret = 0;
}
-out_up:
- up_read(&mm->mmap_sem);
out:
return ret;
}
+int ion_heap_allow_secure_allocation(enum ion_heap_type type)
+{
+ return type == ((enum ion_heap_type) ION_HEAP_TYPE_CP) ||
+ type == ((enum ion_heap_type) ION_HEAP_TYPE_SECURE_DMA);
+}
+
+int ion_heap_allow_handle_secure(enum ion_heap_type type)
+{
+ return type == ((enum ion_heap_type) ION_HEAP_TYPE_CP) ||
+ type == ((enum ion_heap_type) ION_HEAP_TYPE_SECURE_DMA);
+}
+
+int ion_heap_allow_heap_secure(enum ion_heap_type type)
+{
+ return type == ((enum ion_heap_type) ION_HEAP_TYPE_CP);
+}
+
static long msm_ion_custom_ioctl(struct ion_client *client,
unsigned int cmd,
unsigned long arg)
@@ -599,20 +700,12 @@
unsigned long start, end;
struct ion_handle *handle = NULL;
int ret;
+ struct mm_struct *mm = current->active_mm;
if (copy_from_user(&data, (void __user *)arg,
sizeof(struct ion_flush_data)))
return -EFAULT;
- start = (unsigned long) data.vaddr;
- end = (unsigned long) data.vaddr + data.length;
-
- if (check_vaddr_bounds(start, end)) {
- pr_err("%s: virtual address %p is out of bounds\n",
- __func__, data.vaddr);
- return -EINVAL;
- }
-
if (!data.handle) {
handle = ion_import_dma_buf(client, data.fd);
if (IS_ERR(handle)) {
@@ -622,11 +715,27 @@
}
}
+ down_read(&mm->mmap_sem);
+
+ start = (unsigned long) data.vaddr;
+ end = (unsigned long) data.vaddr + data.length;
+
+ if (start && check_vaddr_bounds(start, end)) {
+ up_read(&mm->mmap_sem);
+ pr_err("%s: virtual address %p is out of bounds\n",
+ __func__, data.vaddr);
+ if (!data.handle)
+ ion_free(client, handle);
+ return -EINVAL;
+ }
+
ret = ion_do_cache_op(client,
data.handle ? data.handle : handle,
data.vaddr, data.offset, data.length,
cmd);
+ up_read(&mm->mmap_sem);
+
if (!data.handle)
ion_free(client, handle);
@@ -635,28 +744,74 @@
break;
}
- case ION_IOC_GET_FLAGS:
- {
- struct ion_flag_data data;
- int ret;
- if (copy_from_user(&data, (void __user *)arg,
- sizeof(struct ion_flag_data)))
- return -EFAULT;
-
- ret = ion_handle_get_flags(client, data.handle, &data.flags);
- if (ret < 0)
- return ret;
- if (copy_to_user((void __user *)arg, &data,
- sizeof(struct ion_flag_data)))
- return -EFAULT;
- break;
- }
default:
return -ENOTTY;
}
return 0;
}
+static struct ion_heap *msm_ion_heap_create(struct ion_platform_heap *heap_data)
+{
+ struct ion_heap *heap = NULL;
+
+ switch ((int)heap_data->type) {
+ case ION_HEAP_TYPE_IOMMU:
+ heap = ion_iommu_heap_create(heap_data);
+ break;
+ case ION_HEAP_TYPE_CP:
+ heap = ion_cp_heap_create(heap_data);
+ break;
+#ifdef CONFIG_CMA
+ case ION_HEAP_TYPE_DMA:
+ heap = ion_cma_heap_create(heap_data);
+ break;
+
+ case ION_HEAP_TYPE_SECURE_DMA:
+ heap = ion_secure_cma_heap_create(heap_data);
+ break;
+#endif
+ default:
+ heap = ion_heap_create(heap_data);
+ }
+
+ if (IS_ERR_OR_NULL(heap)) {
+ pr_err("%s: error creating heap %s type %d base %pa size %u\n",
+ __func__, heap_data->name, heap_data->type,
+ &heap_data->base, heap_data->size);
+ return ERR_PTR(-EINVAL);
+ }
+
+ heap->name = heap_data->name;
+ heap->id = heap_data->id;
+ heap->priv = heap_data->priv;
+ return heap;
+}
+
+static void msm_ion_heap_destroy(struct ion_heap *heap)
+{
+ if (!heap)
+ return;
+
+ switch ((int)heap->type) {
+ case ION_HEAP_TYPE_IOMMU:
+ ion_iommu_heap_destroy(heap);
+ break;
+ case ION_HEAP_TYPE_CP:
+ ion_cp_heap_destroy(heap);
+ break;
+#ifdef CONFIG_CMA
+ case ION_HEAP_TYPE_DMA:
+ ion_cma_heap_destroy(heap);
+ break;
+ case ION_HEAP_TYPE_SECURE_DMA:
+ ion_secure_cma_heap_destroy(heap);
+ break;
+#endif
+ default:
+ ion_heap_destroy(heap);
+ }
+}
+
static int msm_ion_probe(struct platform_device *pdev)
{
struct ion_platform_data *pdata;
@@ -664,7 +819,7 @@
int err = -1;
int i;
if (pdev->dev.of_node) {
- pdata = msm_ion_parse_dt(pdev->dev.of_node);
+ pdata = msm_ion_parse_dt(pdev);
if (IS_ERR(pdata)) {
err = PTR_ERR(pdata);
goto out;
@@ -698,15 +853,15 @@
msm_ion_allocate(heap_data);
heap_data->has_outer_cache = pdata->has_outer_cache;
- heaps[i] = ion_heap_create(heap_data);
+ heaps[i] = msm_ion_heap_create(heap_data);
if (IS_ERR_OR_NULL(heaps[i])) {
heaps[i] = 0;
continue;
} else {
if (heap_data->size)
- pr_info("ION heap %s created at %lx "
+ pr_info("ION heap %s created at %pa "
"with size %x\n", heap_data->name,
- heap_data->base,
+ &heap_data->base,
heap_data->size);
else
pr_info("ION heap %s created\n",
@@ -715,10 +870,10 @@
ion_device_add_heap(idev, heaps[i]);
}
+ check_for_heap_overlap(pdata->heaps, num_heaps);
if (pdata_needs_to_be_freed)
free_pdata(pdata);
- check_for_heap_overlap(pdata->heaps, num_heaps);
platform_set_drvdata(pdev, idev);
return 0;
@@ -736,7 +891,7 @@
int i;
for (i = 0; i < num_heaps; i++)
- ion_heap_destroy(heaps[i]);
+ msm_ion_heap_destroy(heaps[i]);
ion_device_destroy(idev);
kfree(heaps);
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index fec5363..3441afa 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -1,4 +1,4 @@
-ccflags-y := -Iinclude/drm -Idrivers/gpu/msm
+ccflags-y := -Iinclude/uapi/drm -Iinclude/drm -Idrivers/gpu/msm
msm_kgsl_core-y = \
kgsl.o \
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index be9f3ac..a2f0e60 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -66,15 +66,103 @@
#define A3XX_RBBM_INT_0_MASK 0x063
#define A3XX_RBBM_INT_0_STATUS 0x064
#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
#define A3XX_RBBM_RBBM_CTL 0x100
-#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
@@ -90,6 +178,7 @@
#define A3XX_CP_MERCIU_DATA2 0x1D3
#define A3XX_CP_MEQ_ADDR 0x1DA
#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
#define A3XX_CP_HW_FAULT 0x45C
#define A3XX_CP_AHB_FAULT 0x54D
#define A3XX_CP_PROTECT_CTRL 0x45E
@@ -138,6 +227,14 @@
#define A3XX_VSC_PIPE_CONFIG_7 0xC1B
#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
@@ -163,14 +260,42 @@
#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_PERFCOUNTER0_SELECT 0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT 0xCC7
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
@@ -232,12 +357,14 @@
#define A3XX_SP_VS_OUT_REG_7 0x22CE
#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_OBJ_START_REG 0x22D5
#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
#define A3XX_SP_VS_LENGTH_REG 0x22DF
#define A3XX_SP_FS_CTRL_REG0 0x22E0
#define A3XX_SP_FS_CTRL_REG1 0x22E1
#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_OBJ_START_REG 0x22E3
#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
@@ -271,8 +398,10 @@
#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
/* Bit flags for RBBM_CTL */
-#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1)
-#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (1 << 17)
+#define RBBM_RBBM_CTL_RESET_PWR_CTR0 BIT(0)
+#define RBBM_RBBM_CTL_RESET_PWR_CTR1 BIT(1)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR0 BIT(16)
+#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 BIT(17)
/* Various flags used by the context switch code */
@@ -537,7 +666,15 @@
#define RBBM_BLOCK_ID_MARB_3 0x2b
/* RBBM_CLOCK_CTL default value */
-#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF
+#define A305_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA
+#define A305C_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA
+#define A320_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF
+#define A330_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF
+#define A330v2_RBBM_CLOCK_CTL_DEFAULT 0xBFFCFFFF
+#define A305B_RBBM_CLOCK_CTL_DEFAULT 0xAAAAAAAA
+
+#define A330_RBBM_GPR0_CTL_DEFAULT 0x00000000
+#define A330v2_RBBM_GPR0_CTL_DEFAULT 0x00000000
/* COUNTABLE FOR SP PERFCOUNTER */
#define SP_FS_FULL_ALU_INSTRUCTIONS 0x0E
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 1886e04..62b6a71 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -17,7 +17,7 @@
#include <linux/sched.h>
#include <linux/of.h>
#include <linux/of_device.h>
-#include <linux/msm_kgsl.h>
+#include <linux/delay.h>
#include <mach/socinfo.h>
#include <mach/msm_bus_board.h>
@@ -30,6 +30,7 @@
#include "kgsl_cffdump.h"
#include "kgsl_sharedmem.h"
#include "kgsl_iommu.h"
+#include "kgsl_trace.h"
#include "adreno.h"
#include "adreno_pm4types.h"
@@ -99,6 +100,7 @@
.irq_name = KGSL_3D0_IRQ,
},
.iomemname = KGSL_3D0_REG_MEMORY,
+ .shadermemname = KGSL_3D0_SHADER_MEMORY,
.ftbl = &adreno_functable,
#ifdef CONFIG_HAS_EARLYSUSPEND
.display_off = {
@@ -166,10 +168,10 @@
/* size of gmem for gpu*/
unsigned int gmem_size;
/* version of pm4 microcode that supports sync_lock
- between CPU and GPU for SMMU-v1 programming */
+ between CPU and GPU for IOMMU-v0 programming */
unsigned int sync_lock_pm4_ver;
/* version of pfp microcode that supports sync_lock
- between CPU and GPU for SMMU-v1 programming */
+ between CPU and GPU for IOMMU-v0 programming */
unsigned int sync_lock_pfp_ver;
} adreno_gpulist[] = {
{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
@@ -198,18 +200,331 @@
"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
1536, 768, 3, SZ_512K, 0x225011, 0x225002 },
/* A3XX doesn't use the pix_shader_start */
- { ADRENO_REV_A305, 3, 0, 5, ANY_ID,
+ { ADRENO_REV_A305, 3, 0, 5, 0,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
512, 0, 2, SZ_256K, 0x3FF037, 0x3FF016 },
/* A3XX doesn't use the pix_shader_start */
{ ADRENO_REV_A320, 3, 2, ANY_ID, ANY_ID,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
- { ADRENO_REV_A330, 3, 3, 0, 0,
+ { ADRENO_REV_A330, 3, 3, 0, ANY_ID,
"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
512, 0, 2, SZ_1M, NO_VER, NO_VER },
+ { ADRENO_REV_A305B, 3, 0, 5, 0x10,
+ "a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
+ 512, 0, 2, SZ_128K, NO_VER, NO_VER },
+ { ADRENO_REV_A305C, 3, 0, 5, 0x20,
+ "a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
+ 512, 0, 2, SZ_128K, 0x3FF037, 0x3FF016 },
};
+/**
+ * adreno_perfcounter_init: Reserve kernel performance counters
+ * @device: device to configure
+ *
+ * The kernel needs/wants a certain group of performance counters for
+ * its own activities. Reserve these performance counters at init time
+ * to ensure that they are always reserved for the kernel. The performance
+ * counters used by the kernel can be obtained by the user, but these
+ * performance counters will remain active as long as the device is alive.
+ */
+
+static void adreno_perfcounter_init(struct kgsl_device *device)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+ if (adreno_dev->gpudev->perfcounter_init)
+ adreno_dev->gpudev->perfcounter_init(adreno_dev);
+};
+
+/**
+ * adreno_perfcounter_start: Enable performance counters
+ * @adreno_dev: Adreno device to configure
+ *
+ * Ensure all performance counters are enabled that are allocated. Since
+ * the device was most likely stopped, we can't trust that the counters
+ * are still valid so make it so.
+ */
+
+static void adreno_perfcounter_start(struct adreno_device *adreno_dev)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i, j;
+
+ /* group id iter */
+ for (i = 0; i < counters->group_count; i++) {
+ group = &(counters->groups[i]);
+
+ /* countable iter */
+ for (j = 0; j < group->reg_count; j++) {
+ if (group->regs[j].countable ==
+ KGSL_PERFCOUNTER_NOT_USED)
+ continue;
+
+ if (adreno_dev->gpudev->perfcounter_enable)
+ adreno_dev->gpudev->perfcounter_enable(
+ adreno_dev, i, j,
+ group->regs[j].countable);
+ }
+ }
+}
+
+/**
+ * adreno_perfcounter_read_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @reads: List of kgsl_perfcounter_read_groups
+ * @count: Length of list
+ *
+ * Read the performance counters for the groupid/countable pairs and return
+ * the 64 bit result for each pair
+ */
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+ struct kgsl_perfcounter_read_group *reads, unsigned int count)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ struct kgsl_perfcounter_read_group *list = NULL;
+ unsigned int i, j;
+ int ret = 0;
+
+ /* perfcounter get/put/query/read not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ /* sanity check for later */
+ if (!adreno_dev->gpudev->perfcounter_read)
+ return -EINVAL;
+
+ /* sanity check params passed in */
+ if (reads == NULL || count == 0 || count > 100)
+ return -EINVAL;
+
+ /* verify valid inputs group ids and countables */
+ for (i = 0; i < count; i++) {
+ if (reads[i].groupid >= counters->group_count)
+ return -EINVAL;
+ }
+
+ list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count,
+ GFP_KERNEL);
+ if (!list)
+ return -ENOMEM;
+
+ if (copy_from_user(list, reads,
+ sizeof(struct kgsl_perfcounter_read_group) * count)) {
+ ret = -EFAULT;
+ goto done;
+ }
+
+ /* list iterator */
+ for (j = 0; j < count; j++) {
+ list[j].value = 0;
+
+ group = &(counters->groups[list[j].groupid]);
+
+ /* group/counter iterator */
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == list[j].countable) {
+ list[j].value =
+ adreno_dev->gpudev->perfcounter_read(
+ adreno_dev, list[j].groupid,
+ i, group->regs[i].offset);
+ break;
+ }
+ }
+ }
+
+ /* write the data */
+ if (copy_to_user(reads, list,
+ sizeof(struct kgsl_perfcounter_read_group) *
+ count) != 0)
+ ret = -EFAULT;
+
+done:
+ kfree(list);
+ return ret;
+}
+
+/**
+ * adreno_perfcounter_query_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countables: Return list of all countables in the groups counters
+ * @count: Max length of the array
+ * @max_counters: max counters for the groupid
+ *
+ * Query the current state of counters for the group.
+ */
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int *countables, unsigned int count,
+ unsigned int *max_counters)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i;
+
+ *max_counters = 0;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+ *max_counters = group->reg_count;
+
+ /*
+ * if NULL countable or *count of zero, return max reg_count in
+ * *max_counters and return success
+ */
+ if (countables == NULL || count == 0)
+ return 0;
+
+ /*
+ * Go through all available counters. Write upto *count * countable
+ * values.
+ */
+ for (i = 0; i < group->reg_count && i < count; i++) {
+ if (copy_to_user(&countables[i], &(group->regs[i].countable),
+ sizeof(unsigned int)) != 0)
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+/**
+ * adreno_perfcounter_get: Try to put a countable in an available counter
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be in a counter
+ * @offset: Return offset of the countable
+ * @flags: Used to setup kernel perf counters
+ *
+ * Try to place a countable in an available counter. If the countable is
+ * already in a counter, reference count the counter/countable pair resource
+ * and return success
+ */
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable, unsigned int *offset,
+ unsigned int flags)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+ unsigned int i, empty = -1;
+
+ /* always clear return variables */
+ if (offset)
+ *offset = 0;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+
+ /*
+ * Check if the countable is already associated with a counter.
+ * Refcount and return the offset, otherwise, try and find an empty
+ * counter and assign the countable to it.
+ */
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == countable) {
+ /* Countable already associated with counter */
+ group->regs[i].refcount++;
+ group->regs[i].flags |= flags;
+ if (offset)
+ *offset = group->regs[i].offset;
+ return 0;
+ } else if (group->regs[i].countable ==
+ KGSL_PERFCOUNTER_NOT_USED) {
+ /* keep track of unused counter */
+ empty = i;
+ }
+ }
+
+ /* no available counters, so do nothing else */
+ if (empty == -1)
+ return -EBUSY;
+
+ /* initialize the new counter */
+ group->regs[empty].countable = countable;
+ group->regs[empty].refcount = 1;
+
+ /* enable the new counter */
+ adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty,
+ countable);
+
+ group->regs[empty].flags = flags;
+
+ if (offset)
+ *offset = group->regs[empty].offset;
+
+ return 0;
+}
+
+
+/**
+ * adreno_perfcounter_put: Release a countable from counter resource
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be freed from a counter
+ *
+ * Put a performance counter/countable pair that was previously received. If
+ * noone else is using the countable, free up the counter for others.
+ */
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable)
+{
+ struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+ struct adreno_perfcount_group *group;
+
+ unsigned int i;
+
+ /* perfcounter get/put/query not allowed on a2xx */
+ if (adreno_is_a2xx(adreno_dev))
+ return -EINVAL;
+
+ if (groupid >= counters->group_count)
+ return -EINVAL;
+
+ group = &(counters->groups[groupid]);
+
+ for (i = 0; i < group->reg_count; i++) {
+ if (group->regs[i].countable == countable) {
+ if (group->regs[i].refcount > 0) {
+ group->regs[i].refcount--;
+
+ /*
+ * book keeping to ensure we never free a
+ * perf counter used by kernel
+ */
+ if (group->regs[i].flags &&
+ group->regs[i].refcount == 0)
+ group->regs[i].refcount++;
+
+ /* make available if not used */
+ if (group->regs[i].refcount == 0)
+ group->regs[i].countable =
+ KGSL_PERFCOUNTER_NOT_USED;
+ }
+
+ return 0;
+ }
+ }
+
+ return -EINVAL;
+}
+
static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
{
irqreturn_t result;
@@ -254,26 +569,29 @@
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
- result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc,
- GSL_PT_PAGE_RV);
+ result = kgsl_mmu_map_global(pagetable, &rb->buffer_desc);
if (result)
goto error;
- result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ result = kgsl_mmu_map_global(pagetable, &rb->memptrs_desc);
if (result)
goto unmap_buffer_desc;
- result = kgsl_mmu_map_global(pagetable, &device->memstore,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ result = kgsl_mmu_map_global(pagetable, &device->memstore);
if (result)
goto unmap_memptrs_desc;
- result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory);
if (result)
goto unmap_memstore_desc;
+ /*
+ * Set the mpu end to the last "normal" global memory we use.
+ * For the IOMMU, this will be used to restrict access to the
+ * mapped registers.
+ */
+ device->mh.mpu_range = device->mmu.setstate_memory.gpuaddr +
+ device->mmu.setstate_memory.size;
return result;
unmap_memstore_desc:
@@ -294,7 +612,7 @@
uint32_t flags)
{
unsigned int pt_val, reg_pt_val;
- unsigned int link[250];
+ unsigned int link[230];
unsigned int *cmds = &link[0];
int sizedwords = 0;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -302,8 +620,14 @@
struct kgsl_context *context;
struct adreno_context *adreno_ctx = NULL;
- if (!adreno_dev->drawctxt_active)
+ /*
+ * If we're idle and we don't need to use the GPU to save context
+ * state, use the CPU instead of the GPU to reprogram the
+ * iommu for simplicity's sake.
+ */
+ if (!adreno_dev->drawctxt_active || device->ftbl->isidle(device))
return kgsl_mmu_device_setstate(&device->mmu, flags);
+
num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
context = idr_find(&device->context_idr, context_id);
@@ -427,7 +751,7 @@
adreno_dev->ringbuffer.timestamp[KGSL_MEMSTORE_GLOBAL], true);
}
- if (sizedwords > (sizeof(link)/sizeof(unsigned int))) {
+ if (sizedwords > (ARRAY_SIZE(link))) {
KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
BUG();
}
@@ -605,7 +929,7 @@
/* 8x25 returns 0 for minor id, but it should be 1 */
if (cpu_is_qsd8x50())
patchid = 1;
- else if (cpu_is_msm8625() && minorid == 0)
+ else if ((cpu_is_msm8625() || cpu_is_msm8625q()) && minorid == 0)
minorid = 1;
chipid |= (minorid << 8) | patchid;
@@ -669,7 +993,6 @@
adreno_dev->instruction_size = adreno_gpulist[i].instruction_size;
adreno_dev->gmem_size = adreno_gpulist[i].gmem_size;
adreno_dev->gpulist_index = i;
-
}
static struct platform_device_id adreno_id_table[] = {
@@ -756,6 +1079,10 @@
&pdata->init_level))
pdata->init_level = 1;
+ if (adreno_of_read_property(parent, "qcom,step-pwrlevel",
+ &pdata->step_mul))
+ pdata->step_mul = 1;
+
if (pdata->init_level < 0 || pdata->init_level > pdata->num_levels) {
KGSL_CORE_ERR("Initial power level out of range\n");
pdata->init_level = 1;
@@ -981,9 +1308,17 @@
goto err;
}
- if (adreno_of_read_property(child, "qcom,iommu-ctx-sids",
- &ctxs[ctx_index].ctx_id))
+ ret = of_property_read_u32_array(child, "reg", reg_val, 2);
+ if (ret) {
+ KGSL_CORE_ERR("Unable to read KGSL IOMMU 'reg'\n");
goto err;
+ }
+ if (msm_soc_version_supports_iommu_v0())
+ ctxs[ctx_index].ctx_id = (reg_val[0] -
+ data->physstart) >> KGSL_IOMMU_CTX_SHIFT;
+ else
+ ctxs[ctx_index].ctx_id = ((reg_val[0] -
+ data->physstart) >> KGSL_IOMMU_CTX_SHIFT) - 8;
ctx_index++;
}
@@ -1038,15 +1373,17 @@
if (ret)
goto err;
- /* Default value is 83, if not found in DT */
if (adreno_of_read_property(pdev->dev.of_node, "qcom,idle-timeout",
&pdata->idle_timeout))
- pdata->idle_timeout = 83;
+ pdata->idle_timeout = HZ/12;
if (adreno_of_read_property(pdev->dev.of_node, "qcom,nap-allowed",
&pdata->nap_allowed))
pdata->nap_allowed = 1;
+ pdata->strtstp_sleepwake = of_property_read_bool(pdev->dev.of_node,
+ "qcom,strtstp-sleepwake");
+
if (adreno_of_read_property(pdev->dev.of_node, "qcom,clk-map",
&pdata->clk_map))
goto err;
@@ -1098,7 +1435,8 @@
static int
adreno_ocmem_gmem_malloc(struct adreno_device *adreno_dev)
{
- if (!adreno_is_a330(adreno_dev))
+ if (!(adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)))
return 0;
/* OCMEM is only needed once, do not support consective allocation */
@@ -1119,7 +1457,8 @@
static void
adreno_ocmem_gmem_free(struct adreno_device *adreno_dev)
{
- if (!adreno_is_a330(adreno_dev))
+ if (!(adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)))
return;
if (adreno_dev->ocmem_hdl == NULL)
@@ -1202,10 +1541,10 @@
return 0;
}
-static int adreno_start(struct kgsl_device *device, unsigned int init_ram)
+static int adreno_init(struct kgsl_device *device)
{
- int status = -EINVAL;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
if (KGSL_STATE_DUMP_AND_FT != device->state)
kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
@@ -1231,10 +1570,9 @@
if (adreno_dev->gpurev == ADRENO_REV_UNKNOWN) {
KGSL_DRV_ERR(device, "Unknown chip ID %x\n",
adreno_dev->chip_id);
- goto error_clk_off;
+ BUG_ON(1);
}
-
/*
* Check if firmware supports the sync lock PM4 packets needed
* for IOMMUv1
@@ -1246,7 +1584,34 @@
adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver))
device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC;
- /* Set up the MMU */
+ rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
+
+ /* Assign correct RBBM status register to hang detect regs
+ */
+ ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
+
+ adreno_perfcounter_init(device);
+
+ /* Power down the device */
+ kgsl_pwrctrl_disable(device);
+
+ return 0;
+}
+
+static int adreno_start(struct kgsl_device *device)
+{
+ int status = -EINVAL;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+ kgsl_cffdump_open(device);
+
+ if (KGSL_STATE_DUMP_AND_FT != device->state)
+ kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+
+ /* Power up the device */
+ kgsl_pwrctrl_enable(device);
+
+ /* Set up a2xx special case */
if (adreno_is_a2xx(adreno_dev)) {
/*
* the MH_CLNT_INTF_CTRL_CONFIG registers aren't present
@@ -1260,20 +1625,6 @@
kgsl_mh_start(device);
}
- /* Assign correct RBBM status register to hang detect regs
- */
- ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
-
- /* Add A3XX specific registers for hang detection */
- if (adreno_is_a3xx(adreno_dev)) {
- ft_detect_regs[6] = A3XX_RBBM_PERFCTR_SP_7_LO;
- ft_detect_regs[7] = A3XX_RBBM_PERFCTR_SP_7_HI;
- ft_detect_regs[8] = A3XX_RBBM_PERFCTR_SP_6_LO;
- ft_detect_regs[9] = A3XX_RBBM_PERFCTR_SP_6_HI;
- ft_detect_regs[10] = A3XX_RBBM_PERFCTR_SP_5_LO;
- ft_detect_regs[11] = A3XX_RBBM_PERFCTR_SP_5_HI;
- }
-
status = kgsl_mmu_start(device);
if (status)
goto error_clk_off;
@@ -1290,22 +1641,30 @@
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
device->ftbl->irqctrl(device, 1);
- status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram);
- if (status == 0) {
- /* While fault tolerance is on we do not want timer to
- * fire and attempt to change any device state */
- if (KGSL_STATE_DUMP_AND_FT != device->state)
- mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
- return 0;
- }
+ status = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
+ if (status)
+ goto error_irq_off;
+ /* While fault tolerance is on we do not want timer to
+ * fire and attempt to change any device state */
+ if (KGSL_STATE_DUMP_AND_FT != device->state)
+ mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
+
+ adreno_perfcounter_start(adreno_dev);
+
+ device->reset_counter++;
+
+ return 0;
+
+error_irq_off:
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
error_mmu_off:
kgsl_mmu_stop(&device->mmu);
error_clk_off:
- kgsl_pwrctrl_disable(device);
+ if (KGSL_STATE_DUMP_AND_FT != device->state)
+ kgsl_pwrctrl_disable(device);
return status;
}
@@ -1329,6 +1688,8 @@
/* Power down the device */
kgsl_pwrctrl_disable(device);
+ kgsl_cffdump_close(device->id);
+
return 0;
}
@@ -1411,6 +1772,8 @@
start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr,
size);
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr);
+ /* Ensure above read is finished before next read */
+ rmb();
if (KGSL_CMD_IDENTIFIER == val1) {
if ((start_ptr / sizeof(unsigned int)) != rb->wptr)
start_ptr = adreno_ringbuffer_dec_wrapped(
@@ -1448,6 +1811,8 @@
temp_rb_rptr, size);
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i],
temp_rb_rptr);
+ /* Ensure above read is finished before next read */
+ rmb();
if (check && ((inc && val[i] == global_eop) ||
(!inc && (val[i] ==
@@ -1512,6 +1877,8 @@
while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
+ /* Ensure above read is finished before next read */
+ rmb();
if (check && val[i] == ib1) {
/* decrement i, i.e i = (i - 1 + 2) % 2 */
@@ -1553,7 +1920,7 @@
return status;
}
-static int adreno_setup_ft_data(struct kgsl_device *device,
+static void adreno_setup_ft_data(struct kgsl_device *device,
struct adreno_ft_data *ft_data)
{
int ret = 0;
@@ -1578,30 +1945,30 @@
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
eoptimestamp));
+ /* Ensure context id and global eop ts read complete */
+ rmb();
+
ft_data->rb_buffer = vmalloc(rb->buffer_desc.size);
if (!ft_data->rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- return -ENOMEM;
+ return;
}
ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
if (!ft_data->bad_rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- ret = -ENOMEM;
- goto done;
+ return;
}
ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
if (!ft_data->good_rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
- ret = -ENOMEM;
- goto done;
+ return;
}
-
- ft_data->status = 0;
+ ft_data->status = 0;
/* find the start of bad command sequence in rb */
context = idr_find(&device->context_idr, ft_data->context_id);
@@ -1612,20 +1979,23 @@
* If there is no context then fault tolerance does not need to
* replay anything, just reset GPU and thats it
*/
- goto done;
+ return;
}
- ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
- ft_data->global_eop + 1, false);
- if (ret)
- goto done;
-
- ft_data->start_of_replay_cmds = rb_rptr;
-
- if (!adreno_dev->ft_policy)
- adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
ft_data->ft_policy = adreno_dev->ft_policy;
+ if (!ft_data->ft_policy)
+ ft_data->ft_policy = KGSL_FT_DEFAULT_POLICY;
+
+ ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
+ ft_data->global_eop + 1, false);
+ if (ret) {
+ ft_data->ft_policy |= KGSL_FT_TEMP_DISABLE;
+ return;
+ } else
+ ft_data->ft_policy &= ~KGSL_FT_TEMP_DISABLE;
+
+ ft_data->start_of_replay_cmds = rb_rptr;
adreno_context = context->devctxt;
if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
@@ -1636,20 +2006,12 @@
KGSL_FT_ERR(device,
"Start not found for replay IB sequence\n");
ret = 0;
- goto done;
+ return;
}
ft_data->start_of_replay_cmds = rb_rptr;
ft_data->replay_for_snapshot = rb_rptr;
}
}
-
-done:
- if (ret) {
- vfree(ft_data->rb_buffer);
- vfree(ft_data->bad_rb_buffer);
- vfree(ft_data->good_rb_buffer);
- }
- return ret;
}
static int
@@ -1663,6 +2025,8 @@
&curr_global_ts,
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
eoptimestamp));
+ /* Ensure above read is finished before long ib check */
+ rmb();
/* Mark long ib as handled */
adreno_dev->long_ib = 0;
@@ -1680,8 +2044,7 @@
static int
_adreno_ft_restart_device(struct kgsl_device *device,
- struct kgsl_context *context,
- struct adreno_ft_data *ft_data)
+ struct kgsl_context *context)
{
struct adreno_context *adreno_context = context->devctxt;
@@ -1692,7 +2055,12 @@
return 1;
}
- if (adreno_start(device, true)) {
+ if (adreno_init(device)) {
+ KGSL_FT_ERR(device, "Device init failed\n");
+ return 1;
+ }
+
+ if (adreno_start(device)) {
KGSL_FT_ERR(device, "Device start failed\n");
return 1;
}
@@ -1751,11 +2119,30 @@
unsigned int *buff, unsigned int size)
{
unsigned int ret = 0;
+ unsigned int retry_num = 0;
_adreno_debug_ft_info(device, ft_data);
- if (_adreno_ft_restart_device(device, context, ft_data))
- return 1;
+ do {
+ ret = _adreno_ft_restart_device(device, context);
+ if (ret == 0)
+ break;
+ /*
+ * If device restart fails sleep for 20ms before
+ * attempting restart. This allows GPU HW to settle
+ * and improve the chances of next restart to be
+ * successful.
+ */
+ msleep(20);
+ KGSL_FT_ERR(device, "Retry device restart %d\n", retry_num);
+ retry_num++;
+ } while (retry_num < 4);
+
+ if (ret) {
+ KGSL_FT_ERR(device, "Device restart failed\n");
+ BUG_ON(1);
+ goto done;
+ }
if (size) {
@@ -1765,6 +2152,7 @@
ret = adreno_idle(device);
}
+done:
return ret;
}
@@ -1779,11 +2167,13 @@
struct kgsl_context *context;
struct adreno_context *adreno_context = NULL;
struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
+ unsigned int long_ib = 0;
context = idr_find(&device->context_idr, ft_data->context_id);
if (context == NULL) {
- KGSL_FT_CRIT(device, "Last context unknown id:%d\n",
+ KGSL_FT_ERR(device, "Last context unknown id:%d\n",
ft_data->context_id);
+ goto play_good_cmds;
} else {
adreno_context = context->devctxt;
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
@@ -1793,11 +2183,17 @@
*/
context->wait_on_invalid_ts = false;
+ if (!(adreno_context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
+ KGSL_FT_ERR(device, "Fault tolerance not supported\n");
+ goto play_good_cmds;
+ }
+
/*
* This flag will be set by userspace for contexts
* that do not want to be fault tolerant (ex: OPENCL)
*/
if (adreno_context->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE) {
+ ft_data->status = 1;
KGSL_FT_ERR(device,
"No FT set for this context play good cmds\n");
goto play_good_cmds;
@@ -1805,51 +2201,57 @@
}
+ /* Check if we detected a long running IB, if false return */
+ if (adreno_dev->long_ib) {
+ long_ib = _adreno_check_long_ib(device);
+ if (!long_ib) {
+ adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
+ return 0;
+ }
+ }
+
/*
* Extract valid contents from rb which can still be executed after
* hang
*/
adreno_ringbuffer_extract(rb, ft_data);
- /* Check if we detected a long running IB,
- * if true do not attempt replay of bad cmds */
- if (adreno_dev->long_ib) {
- if (_adreno_check_long_ib(device)) {
- ft_data->status = 1;
- _adreno_debug_ft_info(device, ft_data);
- goto play_good_cmds;
- } else {
- adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
- return 0;
- }
- }
-
- /* Do not try the bad commands if hang is due to a fault */
- if (device->mmu.fault) {
- KGSL_FT_ERR(device, "MMU fault skipping bad cmds\n");
- device->mmu.fault = 0;
+ /* If long IB detected do not attempt replay of bad cmds */
+ if (long_ib) {
+ _adreno_debug_ft_info(device, ft_data);
goto play_good_cmds;
}
- if (ft_data->ft_policy & KGSL_FT_DISABLE) {
+ if ((ft_data->ft_policy & KGSL_FT_DISABLE) ||
+ (ft_data->ft_policy & KGSL_FT_TEMP_DISABLE)) {
KGSL_FT_ERR(device, "NO FT policy play only good cmds\n");
+ ft_data->status = 1;
goto play_good_cmds;
}
+ /* Do not try the reply if hang is due to a pagefault */
+ if (adreno_context->pagefault) {
+ if ((ft_data->context_id == adreno_context->id) &&
+ (ft_data->global_eop == adreno_context->pagefault_ts)) {
+ ft_data->ft_policy &= ~KGSL_FT_REPLAY;
+ KGSL_FT_ERR(device, "MMU fault skipping replay\n");
+ }
+
+ adreno_context->pagefault = 0;
+ }
+
if (ft_data->ft_policy & KGSL_FT_REPLAY) {
-
ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "Replay unsuccessful\n");
+ KGSL_FT_ERR(device, "Replay status: 1\n");
ft_data->status = 1;
} else
goto play_good_cmds;
}
if (ft_data->ft_policy & KGSL_FT_SKIPIB) {
-
for (i = 0; i < ft_data->bad_rb_size; i++) {
if ((ft_data->bad_rb_buffer[i] ==
CP_HDR_INDIRECT_BUFFER_PFD) &&
@@ -1874,7 +2276,7 @@
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "NOP faulty IB unsuccessful\n");
+ KGSL_FT_ERR(device, "NOP faulty IB status: 1\n");
ft_data->status = 1;
} else {
ft_data->status = 0;
@@ -1883,7 +2285,6 @@
}
if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) {
-
for (i = 0; i < ft_data->bad_rb_size; i++) {
if (ft_data->bad_rb_buffer[i] ==
KGSL_END_OF_FRAME_IDENTIFIER) {
@@ -1905,7 +2306,7 @@
ft_data->bad_rb_buffer, ft_data->bad_rb_size);
if (ret) {
- KGSL_FT_ERR(device, "Skip EOF unsuccessful\n");
+ KGSL_FT_ERR(device, "Skip EOF status: 1\n");
ft_data->status = 1;
} else {
ft_data->status = 0;
@@ -1983,9 +2384,7 @@
/* setup new fault tolerance parameters and retry, this
* means more than 1 contexts are causing hang */
adreno_destroy_ft_data(ft_data);
- ret = adreno_setup_ft_data(device, ft_data);
- if (ret)
- goto done;
+ adreno_setup_ft_data(device, ft_data);
KGSL_FT_INFO(device,
"Retry. Parameters: "
"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
@@ -2050,7 +2449,12 @@
kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel);
/* Get the fault tolerance data as soon as hang is detected */
- result = adreno_setup_ft_data(device, &ft_data);
+ adreno_setup_ft_data(device, &ft_data);
+ /*
+ * Trigger an automatic dump of the state to
+ * the console
+ */
+ kgsl_postmortem_dump(device, 0);
/*
* If long ib is detected, do not attempt postmortem or
@@ -2072,10 +2476,8 @@
kgsl_device_snapshot(device, 1);
}
- if (!result) {
- result = adreno_ft(device, &ft_data);
- adreno_destroy_ft_data(&ft_data);
- }
+ result = adreno_ft(device, &ft_data);
+ adreno_destroy_ft_data(&ft_data);
/* restore power level */
kgsl_pwrctrl_pwrlevel_change(device, curr_pwrlevel);
@@ -2228,39 +2630,6 @@
status = 0;
}
break;
- case KGSL_PROP_FAULT_TOLERANCE: {
- struct kgsl_ft_config ftd;
-
- if (adreno_dev->ft_user_control == 0)
- break;
-
- if (sizebytes != sizeof(ftd))
- break;
-
- if (copy_from_user(&ftd, (void __user *) value,
- sizeof(ftd))) {
- status = -EFAULT;
- break;
- }
-
- if (ftd.ft_policy)
- adreno_dev->ft_policy = ftd.ft_policy;
- else
- adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
-
- if (ftd.ft_pf_policy)
- adreno_dev->ft_pf_policy = ftd.ft_policy;
- else
- adreno_dev->ft_pf_policy =
- KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
-
- if (ftd.ft_pm_dump)
- device->pm_dump_enable = 1;
- else
- device->pm_dump_enable = 0;
-
- }
- break;
default:
break;
}
@@ -2512,12 +2881,23 @@
return memdesc ? kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr) : NULL;
}
-void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
- unsigned int *value)
+/**
+ * adreno_read - General read function to read adreno device memory
+ * @device - Pointer to the GPU device struct (for adreno device)
+ * @base - Base address (kernel virtual) where the device memory is mapped
+ * @offsetwords - Offset in words from the base address, of the memory that
+ * is to be read
+ * @value - Value read from the device memory
+ * @mem_len - Length of the device memory mapped to the kernel
+ */
+static void adreno_read(struct kgsl_device *device, void *base,
+ unsigned int offsetwords, unsigned int *value,
+ unsigned int mem_len)
{
+
unsigned int *reg;
- BUG_ON(offsetwords*sizeof(uint32_t) >= device->reg_len);
- reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
+ BUG_ON(offsetwords*sizeof(uint32_t) >= mem_len);
+ reg = (unsigned int *)(base + (offsetwords << 2));
if (!in_interrupt())
kgsl_pre_hwaccess(device);
@@ -2528,6 +2908,31 @@
rmb();
}
+/**
+ * adreno_regread - Used to read adreno device registers
+ * @offsetwords - Word (4 Bytes) offset to the register to be read
+ * @value - Value read from device register
+ */
+void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
+ unsigned int *value)
+{
+ adreno_read(device, device->reg_virt, offsetwords, value,
+ device->reg_len);
+}
+
+/**
+ * adreno_shadermem_regread - Used to read GPU (adreno) shader memory
+ * @device - GPU device whose shader memory is to be read
+ * @offsetwords - Offset in words, of the shader memory address to be read
+ * @value - Pointer to where the read shader mem value is to be stored
+ */
+void adreno_shadermem_regread(struct kgsl_device *device,
+ unsigned int offsetwords, unsigned int *value)
+{
+ adreno_read(device, device->shader_mem_virt, offsetwords, value,
+ device->shader_mem_len);
+}
+
void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
unsigned int value)
{
@@ -2538,6 +2943,8 @@
if (!in_interrupt())
kgsl_pre_hwaccess(device);
+ trace_kgsl_regwrite(device, offsetwords, value);
+
kgsl_cffdump_regwrite(device->id, offsetwords << 2, value);
reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
@@ -2612,6 +3019,7 @@
kgsl_sharedmem_writel(&device->memstore,
KGSL_MEMSTORE_OFFSET(context_id,
ts_cmp_enable), enableflag);
+
/* Make sure the memstore write gets posted */
wmb();
@@ -2621,9 +3029,10 @@
* get an interrupt
*/
- if (context && device->state != KGSL_STATE_SLUMBER)
+ if (context && device->state != KGSL_STATE_SLUMBER) {
adreno_ringbuffer_issuecmds(device, context->devctxt,
KGSL_CMD_FLAGS_NONE, NULL, 0);
+ }
}
return 0;
@@ -2687,10 +3096,13 @@
if (!adreno_dev->long_ib_detect)
long_ib_detected = 0;
+ if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED))
+ return 0;
+
if (is_adreno_rbbm_status_idle(device)) {
/*
- * On A20X if the RPTR != WPTR and the device is idle, then
+ * On A2XX if the RPTR != WPTR and the device is idle, then
* the last write to WPTR probably failed to latch so write it
* again
*/
@@ -2731,7 +3143,7 @@
&curr_global_ts,
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
eoptimestamp));
-
+ /* Make sure the memstore read has posted */
mb();
if (curr_global_ts == prev_global_ts) {
@@ -2742,6 +3154,8 @@
&curr_context_id,
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
current_context));
+ /* Make sure the memstore read has posted */
+ mb();
context = idr_find(&device->context_idr,
curr_context_id);
if (context != NULL) {
@@ -2753,8 +3167,6 @@
}
}
- mb();
-
if (curr_context != NULL) {
curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART;
@@ -2863,6 +3275,8 @@
ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
adreno_regread(device, REG_CP_RB_RPTR, &rptr);
+
+ /* Make sure timestamp check finished before triggering a hang */
mb();
KGSL_DRV_WARN(device,
@@ -3090,7 +3504,8 @@
break;
}
case KGSL_TIMESTAMP_CONSUMED:
- adreno_regread(device, REG_CP_TIMESTAMP, ×tamp);
+ kgsl_sharedmem_readl(&device->memstore, ×tamp,
+ KGSL_MEMSTORE_OFFSET(context_id, soptimestamp));
break;
case KGSL_TIMESTAMP_RETIRED:
kgsl_sharedmem_readl(&device->memstore, ×tamp,
@@ -3106,27 +3521,55 @@
static long adreno_ioctl(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
+ struct kgsl_device *device = dev_priv->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
int result = 0;
- struct kgsl_drawctxt_set_bin_base_offset *binbase;
- struct kgsl_context *context;
switch (cmd) {
- case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET:
+ case IOCTL_KGSL_DRAWCTXT_SET_BIN_BASE_OFFSET: {
+ struct kgsl_drawctxt_set_bin_base_offset *binbase = data;
+ struct kgsl_context *context;
+
binbase = data;
context = kgsl_find_context(dev_priv, binbase->drawctxt_id);
if (context) {
adreno_drawctxt_set_bin_base_offset(
- dev_priv->device, context, binbase->offset);
+ device, context, binbase->offset);
} else {
result = -EINVAL;
- KGSL_DRV_ERR(dev_priv->device,
+ KGSL_DRV_ERR(device,
"invalid drawctxt drawctxt_id %d "
"device_id=%d\n",
- binbase->drawctxt_id, dev_priv->device->id);
+ binbase->drawctxt_id, device->id);
}
break;
-
+ }
+ case IOCTL_KGSL_PERFCOUNTER_GET: {
+ struct kgsl_perfcounter_get *get = data;
+ result = adreno_perfcounter_get(adreno_dev, get->groupid,
+ get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_PUT: {
+ struct kgsl_perfcounter_put *put = data;
+ result = adreno_perfcounter_put(adreno_dev, put->groupid,
+ put->countable);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_QUERY: {
+ struct kgsl_perfcounter_query *query = data;
+ result = adreno_perfcounter_query_group(adreno_dev,
+ query->groupid, query->countables,
+ query->count, &query->max_counters);
+ break;
+ }
+ case IOCTL_KGSL_PERFCOUNTER_READ: {
+ struct kgsl_perfcounter_read *read = data;
+ result = adreno_perfcounter_read_group(adreno_dev,
+ read->reads, read->count);
+ break;
+ }
default:
KGSL_DRV_INFO(dev_priv->device,
"invalid ioctl code %08x\n", cmd);
@@ -3203,6 +3646,7 @@
.idle = adreno_idle,
.isidle = adreno_isidle,
.suspend_context = adreno_suspend_context,
+ .init = adreno_init,
.start = adreno_start,
.stop = adreno_stop,
.getproperty = adreno_getproperty,
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index d319c98..90d6027 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -39,6 +39,7 @@
/* Command identifiers */
#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF
#define KGSL_CMD_IDENTIFIER 0x2EEDFACE
+#define KGSL_CMD_INTERNAL_IDENTIFIER 0x2EEDD00D
#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE
#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD
#define KGSL_END_OF_FRAME_IDENTIFIER 0x2E0F2E0F
@@ -72,8 +73,10 @@
ADRENO_REV_A220 = 220,
ADRENO_REV_A225 = 225,
ADRENO_REV_A305 = 305,
+ ADRENO_REV_A305C = 306,
ADRENO_REV_A320 = 320,
ADRENO_REV_A330 = 330,
+ ADRENO_REV_A305B = 335,
};
struct adreno_gpudev;
@@ -103,7 +106,6 @@
unsigned int ib_check_level;
unsigned int fast_hang_detect;
unsigned int ft_policy;
- unsigned int ft_user_control;
unsigned int long_ib_detect;
unsigned int long_ib;
unsigned int long_ib_ts;
@@ -111,6 +113,45 @@
unsigned int gpulist_index;
struct ocmem_buf *ocmem_hdl;
unsigned int ocmem_base;
+ unsigned int gpu_cycles;
+};
+
+#define PERFCOUNTER_FLAG_NONE 0x0
+#define PERFCOUNTER_FLAG_KERNEL 0x1
+
+/* Structs to maintain the list of active performance counters */
+
+/**
+ * struct adreno_perfcount_register: register state
+ * @countable: countable the register holds
+ * @refcount: number of users of the register
+ * @offset: register hardware offset
+ */
+struct adreno_perfcount_register {
+ unsigned int countable;
+ unsigned int refcount;
+ unsigned int offset;
+ unsigned int flags;
+};
+
+/**
+ * struct adreno_perfcount_group: registers for a hardware group
+ * @regs: available registers for this group
+ * @reg_count: total registers for this group
+ */
+struct adreno_perfcount_group {
+ struct adreno_perfcount_register *regs;
+ unsigned int reg_count;
+};
+
+/**
+ * adreno_perfcounts: all available perfcounter groups
+ * @groups: available groups for this device
+ * @group_count: total groups for this device
+ */
+struct adreno_perfcounters {
+ struct adreno_perfcount_group *groups;
+ unsigned int group_count;
};
struct adreno_gpudev {
@@ -124,6 +165,8 @@
/* keeps track of when we need to execute the draw workaround code */
int ctx_switches_since_last_draw;
+ struct adreno_perfcounters *perfcounters;
+
/* GPU specific function hooks */
int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
@@ -134,9 +177,15 @@
void (*irq_control)(struct adreno_device *, int);
unsigned int (*irq_pending)(struct adreno_device *);
void * (*snapshot)(struct adreno_device *, void *, int *, int);
- void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
+ int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
+ void (*perfcounter_init)(struct adreno_device *);
void (*start)(struct adreno_device *);
unsigned int (*busy_cycles)(struct adreno_device *);
+ void (*perfcounter_enable)(struct adreno_device *, unsigned int group,
+ unsigned int counter, unsigned int countable);
+ uint64_t (*perfcounter_read)(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter,
+ unsigned int offset);
};
/*
@@ -179,6 +228,22 @@
unsigned int replay_for_snapshot;
};
+/* Fault Tolerance policy flags */
+#define KGSL_FT_DISABLE BIT(0)
+#define KGSL_FT_REPLAY BIT(1)
+#define KGSL_FT_SKIPIB BIT(2)
+#define KGSL_FT_SKIPFRAME BIT(3)
+#define KGSL_FT_TEMP_DISABLE BIT(4)
+#define KGSL_FT_DEFAULT_POLICY (KGSL_FT_REPLAY + KGSL_FT_SKIPIB)
+
+/* Pagefault policy flags */
+#define KGSL_FT_PAGEFAULT_INT_ENABLE 0x00000001
+#define KGSL_FT_PAGEFAULT_GPUHALT_ENABLE 0x00000002
+#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE 0x00000004
+#define KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT 0x00000008
+#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY (KGSL_FT_PAGEFAULT_INT_ENABLE + \
+ KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+
extern struct adreno_gpudev adreno_a2xx_gpudev;
extern struct adreno_gpudev adreno_a3xx_gpudev;
@@ -210,7 +275,13 @@
void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
unsigned int value);
+void adreno_shadermem_regread(struct kgsl_device *device,
+ unsigned int offsetwords,
+ unsigned int *value);
+
int adreno_dump(struct kgsl_device *device, int manual);
+unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
+ *adreno_dev);
struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
unsigned int pt_base,
@@ -234,6 +305,13 @@
unsigned int adreno_ft_detect(struct kgsl_device *device,
unsigned int *prev_reg_val);
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable, unsigned int *offset,
+ unsigned int flags);
+
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+ unsigned int groupid, unsigned int countable);
+
static inline int adreno_is_a200(struct adreno_device *adreno_dev)
{
return (adreno_dev->gpurev == ADRENO_REV_A200);
@@ -285,6 +363,16 @@
return (adreno_dev->gpurev == ADRENO_REV_A305);
}
+static inline int adreno_is_a305b(struct adreno_device *adreno_dev)
+{
+ return (adreno_dev->gpurev == ADRENO_REV_A305B);
+}
+
+static inline int adreno_is_a305c(struct adreno_device *adreno_dev)
+{
+ return (adreno_dev->gpurev == ADRENO_REV_A305C);
+}
+
static inline int adreno_is_a320(struct adreno_device *adreno_dev)
{
return (adreno_dev->gpurev == ADRENO_REV_A320);
@@ -295,6 +383,12 @@
return (adreno_dev->gpurev == ADRENO_REV_A330);
}
+static inline int adreno_is_a330v2(struct adreno_device *adreno_dev)
+{
+ return ((adreno_dev->gpurev == ADRENO_REV_A330) &&
+ (ADRENO_CHIPID_PATCH(adreno_dev->chip_id) > 0));
+}
+
static inline int adreno_rb_ctxtswitch(unsigned int *cmd)
{
return (cmd[0] == cp_nop_packet(1) &&
@@ -400,12 +494,13 @@
unsigned int *start = cmds;
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0;
if ((adreno_dev->gpurev == ADRENO_REV_A305) ||
+ (adreno_dev->gpurev == ADRENO_REV_A305C) ||
(adreno_dev->gpurev == ADRENO_REV_A320)) {
*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0;
}
return cmds - start;
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index ba4e507..dd9bdc3 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1515,18 +1515,26 @@
"Current active context has caused gpu hang\n");
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->reg_save[1],
+ context->reg_save[2] << 2, true);
/* save registers and constants. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->reg_save, 3);
if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->shader_save[1],
+ context->shader_save[2] << 2, true);
/* save shader partitioning and instructions. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->shader_save, 3);
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->shader_fixup[1],
+ context->shader_fixup[2] << 2, true);
/*
* fixup shader partitioning parameter for
* SET_SHADER_BASES.
@@ -1541,6 +1549,9 @@
if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
(context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->context_gmem_shadow.gmem_save[1],
+ context->context_gmem_shadow.gmem_save[2] << 2, true);
/* save gmem.
* (note: changes shader. shader must already be saved.)
*/
@@ -1548,6 +1559,10 @@
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.gmem_save, 3);
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->chicken_restore[1],
+ context->chicken_restore[2] << 2, true);
+
/* Restore TP0_CHICKEN */
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
adreno_ringbuffer_issuecmds(device, context,
@@ -1574,8 +1589,6 @@
return;
}
- KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
-
cmds[0] = cp_nop_packet(1);
cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
@@ -1586,21 +1599,24 @@
cmds, 5);
kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
-#ifndef CONFIG_MSM_KGSL_CFF_DUMP_NO_CONTEXT_MEM_DUMP
- kgsl_cffdump_syncmem(NULL, &context->gpustate,
- context->gpustate.gpuaddr, LCC_SHADOW_SIZE +
- REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false);
-#endif
-
/* restore gmem.
* (note: changes shader. shader must not already be restored.)
*/
if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->context_gmem_shadow.gmem_restore[1],
+ context->context_gmem_shadow.gmem_restore[2] << 2,
+ true);
+
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.gmem_restore, 3);
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->chicken_restore[1],
+ context->chicken_restore[2] << 2, true);
+
/* Restore TP0_CHICKEN */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
@@ -1611,6 +1627,9 @@
}
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->reg_restore[1],
+ context->reg_restore[2] << 2, true);
/* restore registers and constants. */
adreno_ringbuffer_issuecmds(device, context,
@@ -1618,6 +1637,10 @@
/* restore shader instructions & partitioning. */
if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+ kgsl_cffdump_syncmem(NULL, &context->gpustate,
+ context->shader_restore[1],
+ context->shader_restore[2] << 2, true);
+
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->shader_restore, 3);
@@ -1727,12 +1750,8 @@
adreno_regwrite(device, REG_CP_INT_ACK, status);
if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) {
- KGSL_CMD_WARN(rb->device, "ringbuffer ib1/rb interrupt\n");
queue_work(device->work_queue, &device->ts_expired_ws);
wake_up_interruptible_all(&device->wait_queue);
- atomic_notifier_call_chain(&(device->ts_notifier_list),
- device->id,
- NULL);
}
}
@@ -1821,23 +1840,26 @@
static unsigned int a2xx_irq_pending(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
- unsigned int rbbm, cp, mh;
+ unsigned int status;
- adreno_regread(device, REG_RBBM_INT_CNTL, &rbbm);
- adreno_regread(device, REG_CP_INT_CNTL, &cp);
- adreno_regread(device, MH_INTERRUPT_MASK, &mh);
+ adreno_regread(device, REG_MASTER_INT_SIGNAL, &status);
- return ((rbbm & RBBM_INT_MASK) || (cp & CP_INT_MASK) ||
- (mh & kgsl_mmu_get_int_mask())) ? 1 : 0;
+ return (status &
+ (MASTER_INT_SIGNAL__MH_INT_STAT |
+ MASTER_INT_SIGNAL__CP_INT_STAT |
+ MASTER_INT_SIGNAL__RBBM_INT_STAT)) ? 1 : 0;
}
-static void a2xx_rb_init(struct adreno_device *adreno_dev,
+static int a2xx_rb_init(struct adreno_device *adreno_dev,
struct adreno_ringbuffer *rb)
{
unsigned int *cmds, cmds_gpu;
/* ME_INIT */
cmds = adreno_ringbuffer_allocspace(rb, NULL, 19);
+ if (cmds == NULL)
+ return -ENOMEM;
+
cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19);
GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18));
@@ -1890,6 +1912,8 @@
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
adreno_ringbuffer_submit(rb);
+
+ return 0;
}
static unsigned int a2xx_busy_cycles(struct adreno_device *adreno_dev)
diff --git a/drivers/gpu/msm/adreno_a2xx_snapshot.c b/drivers/gpu/msm/adreno_a2xx_snapshot.c
index 75795b1..2c86f82 100644
--- a/drivers/gpu/msm/adreno_a2xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a2xx_snapshot.c
@@ -224,6 +224,31 @@
return DEBUG_SECTION_SZ(MIUDEBUG_COUNT);
}
+/* Snapshot the istore memory */
+static int a2xx_snapshot_istore(struct kgsl_device *device, void *snapshot,
+ int remain, void *priv)
+{
+ struct kgsl_snapshot_istore *header = snapshot;
+ unsigned int *data = snapshot + sizeof(*header);
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ int count, i;
+
+ count = adreno_dev->istore_size * adreno_dev->instruction_size;
+
+ if (remain < (count * 4) + sizeof(*header)) {
+ KGSL_DRV_ERR(device,
+ "snapshot: Not enough memory for the istore section");
+ return 0;
+ }
+
+ header->count = adreno_dev->istore_size;
+
+ for (i = 0; i < count; i++)
+ kgsl_regread(device, ADRENO_ISTORE_START + i, &data[i]);
+
+ return (count * 4) + sizeof(*header);
+}
+
/* A2XX GPU snapshot function - this is where all of the A2XX specific
* bits and pieces are grabbed into the snapshot memory
*/
@@ -338,6 +363,18 @@
}
}
+ /*
+ * Only dump the istore on a hang - reading it on a running system
+ * has a non zero chance of hanging the GPU.
+ */
+
+ if (adreno_is_a2xx(adreno_dev) && hang) {
+ snapshot = kgsl_snapshot_add_section(device,
+ KGSL_SNAPSHOT_SECTION_ISTORE, snapshot, remain,
+ a2xx_snapshot_istore, NULL);
+ }
+
+
/* Reset the clock gating */
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, pmoverride);
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 3d9ec6d..13c723a 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -52,8 +52,8 @@
0x2240, 0x227e,
0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
- 0x22ff, 0x22ff, 0x2340, 0x2343, 0x2348, 0x2349, 0x2350, 0x2356,
- 0x2360, 0x2360, 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+ 0x22ff, 0x22ff, 0x2340, 0x2343,
+ 0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
@@ -61,8 +61,8 @@
0x25f0, 0x25f0,
0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
- 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743, 0x2748, 0x2749,
- 0x2750, 0x2756, 0x2760, 0x2760, 0x300C, 0x300E, 0x301C, 0x301D,
+ 0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
+ 0x300C, 0x300E, 0x301C, 0x301D,
0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
0x303C, 0x303C, 0x305E, 0x305F,
};
@@ -445,6 +445,25 @@
tmp_ctx.cmd = cmd;
}
+unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
+ *adreno_dev)
+{
+ if (adreno_is_a305(adreno_dev))
+ return A305_RBBM_CLOCK_CTL_DEFAULT;
+ else if (adreno_is_a305c(adreno_dev))
+ return A305C_RBBM_CLOCK_CTL_DEFAULT;
+ else if (adreno_is_a320(adreno_dev))
+ return A320_RBBM_CLOCK_CTL_DEFAULT;
+ else if (adreno_is_a330v2(adreno_dev))
+ return A330v2_RBBM_CLOCK_CTL_DEFAULT;
+ else if (adreno_is_a330(adreno_dev))
+ return A330_RBBM_CLOCK_CTL_DEFAULT;
+ else if (adreno_is_a305b(adreno_dev))
+ return A305B_RBBM_CLOCK_CTL_DEFAULT;
+
+ BUG_ON(1);
+}
+
/* Copy GMEM contents to system memory shadow. */
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
@@ -454,7 +473,7 @@
unsigned int *start = cmds;
*cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
- *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
+ *cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
@@ -1250,7 +1269,7 @@
unsigned int *start = cmds;
*cmds++ = cp_type0_packet(A3XX_RBBM_CLOCK_CTL, 1);
- *cmds++ = A3XX_RBBM_CLOCK_CTL_DEFAULT;
+ *cmds++ = adreno_a3xx_rbbm_clock_ctl_default(adreno_dev);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
@@ -2400,6 +2419,11 @@
* already be saved.)
*/
+ kgsl_cffdump_syncmem(NULL,
+ &context->gpustate,
+ context->context_gmem_shadow.gmem_save[1],
+ context->context_gmem_shadow.gmem_save[2] << 2, true);
+
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.
@@ -2421,8 +2445,6 @@
return;
}
- KGSL_CTXT_INFO(device, "context flags %08x\n", context->flags);
-
cmds[0] = cp_nop_packet(1);
cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
@@ -2439,6 +2461,12 @@
*/
if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+ kgsl_cffdump_syncmem(NULL,
+ &context->gpustate,
+ context->context_gmem_shadow.gmem_restore[1],
+ context->context_gmem_shadow.gmem_restore[2] << 2,
+ true);
+
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.
@@ -2471,11 +2499,14 @@
}
}
-static void a3xx_rb_init(struct adreno_device *adreno_dev,
+static int a3xx_rb_init(struct adreno_device *adreno_dev,
struct adreno_ringbuffer *rb)
{
unsigned int *cmds, cmds_gpu;
cmds = adreno_ringbuffer_allocspace(rb, NULL, 18);
+ if (cmds == NULL)
+ return -ENOMEM;
+
cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
@@ -2499,6 +2530,8 @@
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
adreno_ringbuffer_submit(rb);
+
+ return 0;
}
static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
@@ -2581,9 +2614,213 @@
/* Schedule work to free mem and issue ibs */
queue_work(device->work_queue, &device->ts_expired_ws);
+}
- atomic_notifier_call_chain(&device->ts_notifier_list,
- device->id, NULL);
+/**
+ * struct a3xx_perfcounter_register - Define a performance counter register
+ * @load_bit: the bit to set in RBBM_LOAD_CMD0/RBBM_LOAD_CMD1 to force the RBBM
+ * to load the reset value into the appropriate counter
+ * @select: The dword offset of the register to write the selected
+ * countable into
+ */
+
+struct a3xx_perfcounter_register {
+ unsigned int load_bit;
+ unsigned int select;
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_cp[] = {
+ { 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rbbm[] = {
+ { 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+ { 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_pc[] = {
+ { 3, A3XX_PC_PERFCOUNTER0_SELECT },
+ { 4, A3XX_PC_PERFCOUNTER1_SELECT },
+ { 5, A3XX_PC_PERFCOUNTER2_SELECT },
+ { 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vfd[] = {
+ { 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+ { 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_hlsq[] = {
+ { 9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
+ { 10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
+ { 11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
+ { 12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
+ { 13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
+ { 14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vpc[] = {
+ { 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+ { 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tse[] = {
+ { 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+ { 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_ras[] = {
+ { 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+ { 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_uche[] = {
+ { 21, A3XX_UCHE_PERFCOUNTER0_SELECT },
+ { 22, A3XX_UCHE_PERFCOUNTER1_SELECT },
+ { 23, A3XX_UCHE_PERFCOUNTER2_SELECT },
+ { 24, A3XX_UCHE_PERFCOUNTER3_SELECT },
+ { 25, A3XX_UCHE_PERFCOUNTER4_SELECT },
+ { 26, A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tp[] = {
+ { 27, A3XX_TP_PERFCOUNTER0_SELECT },
+ { 28, A3XX_TP_PERFCOUNTER1_SELECT },
+ { 29, A3XX_TP_PERFCOUNTER2_SELECT },
+ { 30, A3XX_TP_PERFCOUNTER3_SELECT },
+ { 31, A3XX_TP_PERFCOUNTER4_SELECT },
+ { 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_sp[] = {
+ { 33, A3XX_SP_PERFCOUNTER0_SELECT },
+ { 34, A3XX_SP_PERFCOUNTER1_SELECT },
+ { 35, A3XX_SP_PERFCOUNTER2_SELECT },
+ { 36, A3XX_SP_PERFCOUNTER3_SELECT },
+ { 37, A3XX_SP_PERFCOUNTER4_SELECT },
+ { 38, A3XX_SP_PERFCOUNTER5_SELECT },
+ { 39, A3XX_SP_PERFCOUNTER6_SELECT },
+ { 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rb[] = {
+ { 41, A3XX_RB_PERFCOUNTER0_SELECT },
+ { 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+#define REGCOUNTER_GROUP(_x) { (_x), ARRAY_SIZE((_x)) }
+
+static struct {
+ struct a3xx_perfcounter_register *regs;
+ int count;
+} a3xx_perfcounter_reglist[] = {
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_cp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_rbbm),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_pc),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_vfd),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_hlsq),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_vpc),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_tse),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_ras),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_uche),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_tp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_sp),
+ REGCOUNTER_GROUP(a3xx_perfcounter_reg_rb),
+};
+
+static void a3xx_perfcounter_enable_pwr(struct kgsl_device *device,
+ unsigned int countable)
+{
+ unsigned int in, out;
+
+ adreno_regread(device, A3XX_RBBM_RBBM_CTL, &in);
+
+ if (countable == 0)
+ out = in | RBBM_RBBM_CTL_RESET_PWR_CTR0;
+ else
+ out = in | RBBM_RBBM_CTL_RESET_PWR_CTR1;
+
+ adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
+
+ if (countable == 0)
+ out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR0;
+ else
+ out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+
+ adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
+
+ return;
+}
+
+/*
+ * a3xx_perfcounter_enable - Configure a performance counter for a countable
+ * @adreno_dev - Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Physically set up a counter within a group with the desired countable
+ */
+
+static void a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter, unsigned int countable)
+{
+ struct kgsl_device *device = &adreno_dev->dev;
+ unsigned int val = 0;
+ struct a3xx_perfcounter_register *reg;
+
+ if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
+ return;
+
+ if (counter > a3xx_perfcounter_reglist[group].count)
+ return;
+
+ /* Special case - power */
+ if (group == KGSL_PERFCOUNTER_GROUP_PWR)
+ return a3xx_perfcounter_enable_pwr(device, countable);
+
+ reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+
+ /* Select the desired perfcounter */
+ adreno_regwrite(device, reg->select, countable);
+
+ if (reg->load_bit < 32) {
+ val = 1 << reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val);
+ } else {
+ val = 1 << (reg->load_bit - 32);
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val);
+ }
+}
+
+static uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
+ unsigned int group, unsigned int counter,
+ unsigned int offset)
+{
+ struct kgsl_device *device = &adreno_dev->dev;
+ struct a3xx_perfcounter_register *reg = NULL;
+ unsigned int lo = 0, hi = 0;
+ unsigned int val;
+
+ if (group > ARRAY_SIZE(a3xx_perfcounter_reglist))
+ return 0;
+
+ reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+
+ /* Freeze the counter */
+ adreno_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
+ val &= ~reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+ /* Read the values */
+ adreno_regread(device, offset, &lo);
+ adreno_regread(device, offset + 1, &hi);
+
+ /* Re-Enable the counter */
+ val |= reg->load_bit;
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+
+ return (((uint64_t) hi) << 32) | lo;
}
#define A3XX_IRQ_CALLBACK(_c) { .func = _c }
@@ -2687,26 +2924,22 @@
static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
- unsigned int reg, val;
-
- /* Freeze the counter */
- adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®);
- reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
- adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+ unsigned int val;
+ unsigned int ret = 0;
/* Read the value */
adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
- /* Reset the counter */
- reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
- adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
+ /* Return 0 for the first read */
+ if (adreno_dev->gpu_cycles != 0) {
+ if (val < adreno_dev->gpu_cycles)
+ ret = (0xFFFFFFFF - adreno_dev->gpu_cycles) + val;
+ else
+ ret = val - adreno_dev->gpu_cycles;
+ }
- /* Re-enable the counter */
- reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
- reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
- adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
-
- return val;
+ adreno_dev->gpu_cycles = val;
+ return ret;
}
struct a3xx_vbif_data {
@@ -2734,6 +2967,29 @@
{0, 0},
};
+static struct a3xx_vbif_data a305b_vbif[] = {
+ { A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818 },
+ { A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818 },
+ { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018 },
+ { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018 },
+ { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303 },
+ { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+ {0, 0},
+};
+
+static struct a3xx_vbif_data a305c_vbif[] = {
+ { A3XX_VBIF_IN_RD_LIM_CONF0, 0x00101010 },
+ { A3XX_VBIF_IN_WR_LIM_CONF0, 0x00101010 },
+ { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
+ { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
+ { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000101 },
+ { A3XX_VBIF_ARB_CTL, 0x00000010 },
+ /* Set up AOOO */
+ { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x00000007 },
+ { A3XX_VBIF_OUT_AXI_AOOO, 0x00070007 },
+ {0, 0},
+};
+
static struct a3xx_vbif_data a320_vbif[] = {
/* Set up 16 deep read/write request queues */
{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
@@ -2784,17 +3040,81 @@
{0, 0},
};
+/*
+ * Most of the VBIF registers on 8974v2 have the correct values at power on, so
+ * we won't modify those if we don't need to
+ */
+static struct a3xx_vbif_data a330v2_vbif[] = {
+ /* Enable 1k sort */
+ { A3XX_VBIF_ABIT_SORT, 0x0001003F },
+ { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+ /* Enable WR-REQ */
+ { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+ { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+ /* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+ { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+ {0, 0},
+};
+
+static struct {
+ int(*devfunc)(struct adreno_device *);
+ struct a3xx_vbif_data *vbif;
+} a3xx_vbif_platforms[] = {
+ { adreno_is_a305, a305_vbif },
+ { adreno_is_a305c, a305c_vbif },
+ { adreno_is_a320, a320_vbif },
+ /* A330v2 needs to be ahead of A330 so the right device matches */
+ { adreno_is_a330v2, a330v2_vbif },
+ { adreno_is_a330, a330_vbif },
+ { adreno_is_a305b, a305b_vbif },
+};
+
+static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+ /*
+ * Set SP to count SP_ALU_ACTIVE_CYCLES, it includes
+ * all ALU instruction execution regardless precision or shader ID.
+ * Set SP to count SP0_ICL1_MISSES, It counts
+ * USP L1 instruction miss request.
+ * Set SP to count SP_FS_FULL_ALU_INSTRUCTIONS, it
+ * counts USP flow control instruction execution.
+ * we will use this to augment our hang detection
+ */
+ if (adreno_dev->fast_hang_detect) {
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[7] = ft_detect_regs[6] + 1;
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP0_ICL1_MISSES, &ft_detect_regs[8],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[9] = ft_detect_regs[8] + 1;
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10],
+ PERFCOUNTER_FLAG_KERNEL);
+ ft_detect_regs[11] = ft_detect_regs[10] + 1;
+ }
+
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+ SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL);
+
+ /* Reserve and start countable 1 in the PWR perfcounter group */
+ adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+ NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
static void a3xx_start(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
struct a3xx_vbif_data *vbif = NULL;
+ int i;
- if (adreno_is_a305(adreno_dev))
- vbif = a305_vbif;
- else if (adreno_is_a320(adreno_dev))
- vbif = a320_vbif;
- else if (adreno_is_a330(adreno_dev))
- vbif = a330_vbif;
+ for (i = 0; i < ARRAY_SIZE(a3xx_vbif_platforms); i++) {
+ if (a3xx_vbif_platforms[i].devfunc(adreno_dev)) {
+ vbif = a3xx_vbif_platforms[i].vbif;
+ break;
+ }
+ }
BUG_ON(vbif == NULL);
@@ -2832,10 +3152,18 @@
/* Enable Clock gating */
adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
- A3XX_RBBM_CLOCK_CTL_DEFAULT);
+ adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
+
+ if (adreno_is_a330v2(adreno_dev))
+ adreno_regwrite(device, A3XX_RBBM_GPR0_CTL,
+ A330v2_RBBM_GPR0_CTL_DEFAULT);
+ else if (adreno_is_a330(adreno_dev))
+ adreno_regwrite(device, A3XX_RBBM_GPR0_CTL,
+ A330_RBBM_GPR0_CTL_DEFAULT);
/* Set the OCMEM base address for A330 */
- if (adreno_is_a330(adreno_dev)) {
+ if (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)) {
adreno_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
(unsigned int)(adreno_dev->ocmem_base >> 14));
}
@@ -2843,25 +3171,121 @@
/* Turn on performance counters */
adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
- /*
- * Set SP perfcounter 5 to count SP_ALU_ACTIVE_CYCLES, it includes
- * all ALU instruction execution regardless precision or shader ID.
- * Set SP perfcounter 6 to count SP0_ICL1_MISSES, It counts
- * USP L1 instruction miss request.
- * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS, it
- * counts USP flow control instruction execution.
- * we will use this to augment our hang detection
- */
- if (adreno_dev->fast_hang_detect) {
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER5_SELECT,
- SP_ALU_ACTIVE_CYCLES);
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER6_SELECT,
- SP0_ICL1_MISSES);
- adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
- SP_FS_CFLOW_INSTRUCTIONS);
- }
+ /* Turn on the GPU busy counter and let it run free */
+
+ adreno_dev->gpu_cycles = 0;
}
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_CP_0_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PC_3_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VFD_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_VPC_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TSE_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RAS_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_TP_5_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_1_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_2_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_3_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_4_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_5_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_6_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_SP_7_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_RB_1_LO, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_0_LO, 0 },
+ { KGSL_PERFCOUNTER_NOT_USED, 0, A3XX_RBBM_PERFCTR_PWR_1_LO, 0 },
+};
+
+static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
+ { a3xx_perfcounters_cp, ARRAY_SIZE(a3xx_perfcounters_cp) },
+ { a3xx_perfcounters_rbbm, ARRAY_SIZE(a3xx_perfcounters_rbbm) },
+ { a3xx_perfcounters_pc, ARRAY_SIZE(a3xx_perfcounters_pc) },
+ { a3xx_perfcounters_vfd, ARRAY_SIZE(a3xx_perfcounters_vfd) },
+ { a3xx_perfcounters_hlsq, ARRAY_SIZE(a3xx_perfcounters_hlsq) },
+ { a3xx_perfcounters_vpc, ARRAY_SIZE(a3xx_perfcounters_vpc) },
+ { a3xx_perfcounters_tse, ARRAY_SIZE(a3xx_perfcounters_tse) },
+ { a3xx_perfcounters_ras, ARRAY_SIZE(a3xx_perfcounters_ras) },
+ { a3xx_perfcounters_uche, ARRAY_SIZE(a3xx_perfcounters_uche) },
+ { a3xx_perfcounters_tp, ARRAY_SIZE(a3xx_perfcounters_tp) },
+ { a3xx_perfcounters_sp, ARRAY_SIZE(a3xx_perfcounters_sp) },
+ { a3xx_perfcounters_rb, ARRAY_SIZE(a3xx_perfcounters_rb) },
+ { a3xx_perfcounters_pwr, ARRAY_SIZE(a3xx_perfcounters_pwr) },
+};
+
+static struct adreno_perfcounters a3xx_perfcounters = {
+ a3xx_perfcounter_groups,
+ ARRAY_SIZE(a3xx_perfcounter_groups),
+};
+
/* Defined in adreno_a3xx_snapshot.c */
void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang);
@@ -2870,16 +3294,20 @@
.reg_rbbm_status = A3XX_RBBM_STATUS,
.reg_cp_pfp_ucode_addr = A3XX_CP_PFP_UCODE_ADDR,
.reg_cp_pfp_ucode_data = A3XX_CP_PFP_UCODE_DATA,
+ .perfcounters = &a3xx_perfcounters,
.ctxt_create = a3xx_drawctxt_create,
.ctxt_save = a3xx_drawctxt_save,
.ctxt_restore = a3xx_drawctxt_restore,
.ctxt_draw_workaround = NULL,
.rb_init = a3xx_rb_init,
+ .perfcounter_init = a3xx_perfcounter_init,
.irq_control = a3xx_irq_control,
.irq_handler = a3xx_irq_handler,
.irq_pending = a3xx_irq_pending,
.busy_cycles = a3xx_busy_cycles,
.start = a3xx_start,
.snapshot = a3xx_snapshot,
+ .perfcounter_enable = a3xx_perfcounter_enable,
+ .perfcounter_read = a3xx_perfcounter_read,
};
diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c
index d9d5ec8..58e3126 100644
--- a/drivers/gpu/msm/adreno_a3xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -11,6 +11,7 @@
*
*/
+#include <linux/io.h>
#include "kgsl.h"
#include "adreno.h"
#include "kgsl_snapshot.h"
@@ -19,14 +20,43 @@
#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+ sizeof(struct kgsl_snapshot_debug))
+/* Shader memory size in words */
#define SHADER_MEMORY_SIZE 0x4000
+/**
+ * _rbbm_debug_bus_read - Helper function to read data from the RBBM
+ * debug bus.
+ * @device - GPU device to read/write registers
+ * @block_id - Debug bus block to read from
+ * @index - Index in the debug bus block to read
+ * @ret - Value of the register read
+ */
+static void _rbbm_debug_bus_read(struct kgsl_device *device,
+ unsigned int block_id, unsigned int index, unsigned int *val)
+{
+ unsigned int block = (block_id << 8) | 1 << 16;
+ adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
+ adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
+}
+
+/**
+ * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device - GPU device whose shader memory is to be dumped
+ * @snapshot - Pointer to binary snapshot data blob being made
+ * @remain - Number of remaining bytes in the snapshot blob
+ * @priv - Unused parameter
+ */
static int a3xx_snapshot_shader_memory(struct kgsl_device *device,
void *snapshot, int remain, void *priv)
{
struct kgsl_snapshot_debug *header = snapshot;
+ unsigned int i;
unsigned int *data = snapshot + sizeof(*header);
- int i;
+ unsigned int shader_read_len = SHADER_MEMORY_SIZE;
+
+ if (SHADER_MEMORY_SIZE > (device->shader_mem_len >> 2))
+ shader_read_len = (device->shader_mem_len >> 2);
if (remain < DEBUG_SECTION_SZ(SHADER_MEMORY_SIZE)) {
SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
@@ -36,8 +66,22 @@
header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
header->size = SHADER_MEMORY_SIZE;
- for (i = 0; i < SHADER_MEMORY_SIZE; i++)
- adreno_regread(device, 0x4000 + i, &data[i]);
+ /* Map shader memory to kernel, for dumping */
+ if (device->shader_mem_virt == NULL)
+ device->shader_mem_virt = devm_ioremap(device->dev,
+ device->shader_mem_phys,
+ device->shader_mem_len);
+
+ if (device->shader_mem_virt == NULL) {
+ KGSL_DRV_ERR(device,
+ "Unable to map shader memory region\n");
+ return 0;
+ }
+
+ /* Now, dump shader memory to snapshot */
+ for (i = 0; i < shader_read_len; i++)
+ adreno_shadermem_regread(device, i, &data[i]);
+
return DEBUG_SECTION_SZ(SHADER_MEMORY_SIZE);
}
@@ -170,7 +214,8 @@
int i, size;
/* The size of the ROQ buffer is core dependent */
- size = adreno_is_a330(adreno_dev) ?
+ size = (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)) ?
A330_CP_ROQ_SIZE : A320_CP_ROQ_SIZE;
if (remain < DEBUG_SECTION_SZ(size)) {
@@ -220,66 +265,77 @@
return DEBUG_SECTION_SZ(size);
}
-#define DEBUGFS_BLOCK_SIZE 0x40
+struct debugbus_block {
+ unsigned int block_id;
+ unsigned int dwords;
+};
static int a3xx_snapshot_debugbus_block(struct kgsl_device *device,
void *snapshot, int remain, void *priv)
{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
struct kgsl_snapshot_debugbus *header = snapshot;
- unsigned int id = (unsigned int) priv;
- unsigned int val;
+ struct debugbus_block *block = priv;
int i;
unsigned int *data = snapshot + sizeof(*header);
- int size =
- (DEBUGFS_BLOCK_SIZE * sizeof(unsigned int)) + sizeof(*header);
+ unsigned int dwords;
+ int size;
+
+ /*
+ * For A305 and A320 all debug bus regions are the same size (0x40). For
+ * A330, they can be different sizes - most are still 0x40, but some
+ * like CP are larger
+ */
+
+ dwords = (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)) ?
+ block->dwords : 0x40;
+
+ size = (dwords * sizeof(unsigned int)) + sizeof(*header);
if (remain < size) {
SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
return 0;
}
- val = (id << 8) | (1 << 16);
+ header->id = block->block_id;
+ header->count = dwords;
- header->id = id;
- header->count = DEBUGFS_BLOCK_SIZE;
-
- for (i = 0; i < DEBUGFS_BLOCK_SIZE; i++) {
- adreno_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, val | i);
- adreno_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS,
- &data[i]);
- }
+ for (i = 0; i < dwords; i++)
+ _rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
return size;
}
-static unsigned int debugbus_blocks[] = {
- RBBM_BLOCK_ID_CP,
- RBBM_BLOCK_ID_RBBM,
- RBBM_BLOCK_ID_VBIF,
- RBBM_BLOCK_ID_HLSQ,
- RBBM_BLOCK_ID_UCHE,
- RBBM_BLOCK_ID_PC,
- RBBM_BLOCK_ID_VFD,
- RBBM_BLOCK_ID_VPC,
- RBBM_BLOCK_ID_TSE,
- RBBM_BLOCK_ID_RAS,
- RBBM_BLOCK_ID_VSC,
- RBBM_BLOCK_ID_SP_0,
- RBBM_BLOCK_ID_SP_1,
- RBBM_BLOCK_ID_SP_2,
- RBBM_BLOCK_ID_SP_3,
- RBBM_BLOCK_ID_TPL1_0,
- RBBM_BLOCK_ID_TPL1_1,
- RBBM_BLOCK_ID_TPL1_2,
- RBBM_BLOCK_ID_TPL1_3,
- RBBM_BLOCK_ID_RB_0,
- RBBM_BLOCK_ID_RB_1,
- RBBM_BLOCK_ID_RB_2,
- RBBM_BLOCK_ID_RB_3,
- RBBM_BLOCK_ID_MARB_0,
- RBBM_BLOCK_ID_MARB_1,
- RBBM_BLOCK_ID_MARB_2,
- RBBM_BLOCK_ID_MARB_3,
+static struct debugbus_block debugbus_blocks[] = {
+ { RBBM_BLOCK_ID_CP, 0x52, },
+ { RBBM_BLOCK_ID_RBBM, 0x40, },
+ { RBBM_BLOCK_ID_VBIF, 0x40, },
+ { RBBM_BLOCK_ID_HLSQ, 0x40, },
+ { RBBM_BLOCK_ID_UCHE, 0x40, },
+ { RBBM_BLOCK_ID_PC, 0x40, },
+ { RBBM_BLOCK_ID_VFD, 0x40, },
+ { RBBM_BLOCK_ID_VPC, 0x40, },
+ { RBBM_BLOCK_ID_TSE, 0x40, },
+ { RBBM_BLOCK_ID_RAS, 0x40, },
+ { RBBM_BLOCK_ID_VSC, 0x40, },
+ { RBBM_BLOCK_ID_SP_0, 0x40, },
+ { RBBM_BLOCK_ID_SP_1, 0x40, },
+ { RBBM_BLOCK_ID_SP_2, 0x40, },
+ { RBBM_BLOCK_ID_SP_3, 0x40, },
+ { RBBM_BLOCK_ID_TPL1_0, 0x40, },
+ { RBBM_BLOCK_ID_TPL1_1, 0x40, },
+ { RBBM_BLOCK_ID_TPL1_2, 0x40, },
+ { RBBM_BLOCK_ID_TPL1_3, 0x40, },
+ { RBBM_BLOCK_ID_RB_0, 0x40, },
+ { RBBM_BLOCK_ID_RB_1, 0x40, },
+ { RBBM_BLOCK_ID_RB_2, 0x40, },
+ { RBBM_BLOCK_ID_RB_3, 0x40, },
+ { RBBM_BLOCK_ID_MARB_0, 0x40, },
+ { RBBM_BLOCK_ID_MARB_1, 0x40, },
+ { RBBM_BLOCK_ID_MARB_2, 0x40, },
+ { RBBM_BLOCK_ID_MARB_3, 0x40, },
};
static void *a3xx_snapshot_debugbus(struct kgsl_device *device,
@@ -291,7 +347,7 @@
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot, remain,
a3xx_snapshot_debugbus_block,
- (void *) debugbus_blocks[i]);
+ (void *) &debugbus_blocks[i]);
}
return snapshot;
@@ -309,18 +365,58 @@
struct kgsl_snapshot_registers_list *list,
struct adreno_device *adreno_dev)
{
- /* HLSQ specific registers */
+ struct kgsl_device *device = &adreno_dev->dev;
+
/*
- * Don't dump any a3xx HLSQ registers just yet. Reading the HLSQ
- * registers can cause the device to hang if the HLSQ block is
- * busy. Add specific checks for each a3xx core as the requirements
- * are discovered. Disable by default for now.
+ * Trying to read HLSQ registers when the HLSQ block is busy
+ * will cause the device to hang. The RBBM_DEBUG_BUS has information
+ * that will tell us if the HLSQ block is busy or not. Read values
+ * from the debug bus to ensure the HLSQ block is not busy (this
+ * is hardware dependent). If the HLSQ block is busy do not
+ * dump the registers, otherwise dump the HLSQ registers.
*/
- if (!adreno_is_a3xx(adreno_dev)) {
- regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers;
- regs[list->count].count = a3xx_hlsq_registers_count;
- list->count++;
+
+ if (adreno_is_a330(adreno_dev)) {
+ /*
+ * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27]
+ *
+ * if (!stall_context_full)
+ * then dump HLSQ registers
+ */
+ unsigned int stall_context_full = 0;
+
+ _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49,
+ &stall_context_full);
+ stall_context_full &= 0x08000000;
+
+ if (stall_context_full)
+ return;
+ } else {
+ /*
+ * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
+ * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
+ *
+ * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
+ * then dump HLSQ registers
+ */
+ unsigned int next_pif = 0;
+
+ /* check tpif */
+ _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
+ next_pif &= 0x1f;
+ if (next_pif != 0 && next_pif != 1 && next_pif != 28)
+ return;
+
+ /* check spif */
+ _rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
+ next_pif &= 0x3f;
+ if (next_pif != 0 && next_pif != 1 && next_pif != 10)
+ return;
}
+
+ regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers;
+ regs[list->count].count = a3xx_hlsq_registers_count;
+ list->count++;
}
static void _snapshot_a330_regs(struct kgsl_snapshot_registers *regs,
@@ -342,6 +438,7 @@
struct kgsl_device *device = &adreno_dev->dev;
struct kgsl_snapshot_registers_list list;
struct kgsl_snapshot_registers regs[5];
+ int size;
list.registers = regs;
list.count = 0;
@@ -352,7 +449,7 @@
/* Store relevant registers in list to snapshot */
_snapshot_a3xx_regs(regs, &list);
_snapshot_hlsq_regs(regs, &list, adreno_dev);
- if (adreno_is_a330(adreno_dev))
+ if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
_snapshot_a330_regs(regs, &list);
/* Master set of (non debug) registers */
@@ -360,10 +457,15 @@
KGSL_SNAPSHOT_SECTION_REGS, snapshot, remain,
kgsl_snapshot_dump_regs, &list);
- /* CP_STATE_DEBUG indexed registers */
+ /*
+ * CP_STATE_DEBUG indexed registers - 20 on 305 and 320 and 46 on A330
+ */
+ size = (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)) ? 0x2E : 0x14;
+
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_CP_STATE_DEBUG_INDEX,
- REG_CP_STATE_DEBUG_DATA, 0x0, 0x14);
+ REG_CP_STATE_DEBUG_DATA, 0x0, size);
/* CP_ME indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
@@ -404,7 +506,8 @@
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a3xx_snapshot_cp_roq, NULL);
- if (adreno_is_a330(adreno_dev)) {
+ if (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev)) {
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a330_snapshot_cp_merciu, NULL);
@@ -414,7 +517,7 @@
/* Enable Clock gating */
adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
- A3XX_RBBM_CLOCK_CTL_DEFAULT);
+ adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
return snapshot;
}
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index 890c8a1..ef599e9 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -64,11 +64,6 @@
adreno_dev->fast_hang_detect = 1;
debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs,
&adreno_dev->fast_hang_detect);
-
- /* Top level switch to enable/disable userspace FT control */
- adreno_dev->ft_user_control = 0;
- debugfs_create_u32("ft_user_control", 0644, device->d_debugfs,
- &adreno_dev->ft_user_control);
/*
* FT policy can be set to any of the options below.
* KGSL_FT_DISABLE -> BIT(0) Set to disable FT
@@ -80,7 +75,6 @@
adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
debugfs_create_u32("ft_policy", 0644, device->d_debugfs,
&adreno_dev->ft_policy);
-
/* By default enable long IB detection */
adreno_dev->long_ib_detect = 1;
debugfs_create_u32("long_ib_detect", 0644, device->d_debugfs,
@@ -96,7 +90,7 @@
* KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT -> BIT(3) Set to log only one
* pagefault per INT.
*/
- adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
- debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
- &adreno_dev->ft_pf_policy);
+ adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
+ debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
+ &adreno_dev->ft_pf_policy);
}
diff --git a/drivers/gpu/msm/adreno_debugfs.h b/drivers/gpu/msm/adreno_debugfs.h
deleted file mode 100644
index 1c97ebb..0000000
--- a/drivers/gpu/msm/adreno_debugfs.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* Copyright (c) 2002,2008-2012, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-#ifndef __ADRENO_DEBUGFS_H
-#define __ADRENO_DEBUGFS_H
-
-#ifdef CONFIG_DEBUG_FS
-
-int adreno_debugfs_init(struct kgsl_device *device);
-
-extern int adreno_pm_regs_enabled;
-extern int adreno_pm_ib_enabled;
-
-static inline int is_adreno_pm_regs_enabled(void)
-{
- return adreno_pm_regs_enabled;
-}
-
-static inline int is_adreno_pm_ib_enabled(void)
-{
- return adreno_pm_ib_enabled;
-}
-
-#else
-static inline int adreno_debugfs_init(struct kgsl_device *device)
-{
- return 0;
-}
-
-static inline int kgsl_pmregs_enabled(void)
-{
-
- return 1;
-}
-#endif
-
-#endif
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index 6fbcdee..176717d 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -12,6 +12,7 @@
*/
#include <linux/slab.h>
+#include <linux/msm_kgsl.h>
#include "kgsl.h"
#include "kgsl_sharedmem.h"
@@ -143,7 +144,7 @@
*/
int adreno_drawctxt_create(struct kgsl_device *device,
struct kgsl_pagetable *pagetable,
- struct kgsl_context *context, uint32_t flags)
+ struct kgsl_context *context, uint32_t *flags)
{
struct adreno_context *drawctxt;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -162,26 +163,36 @@
drawctxt->id = context->id;
rb->timestamp[context->id] = 0;
- if (flags & KGSL_CONTEXT_PREAMBLE)
+ *flags &= (KGSL_CONTEXT_PREAMBLE |
+ KGSL_CONTEXT_NO_GMEM_ALLOC |
+ KGSL_CONTEXT_PER_CONTEXT_TS |
+ KGSL_CONTEXT_USER_GENERATED_TS |
+ KGSL_CONTEXT_NO_FAULT_TOLERANCE |
+ KGSL_CONTEXT_TYPE_MASK);
+
+ if (*flags & KGSL_CONTEXT_PREAMBLE)
drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
- if (flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
+ if (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
- if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
+ if (*flags & KGSL_CONTEXT_PER_CONTEXT_TS)
drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
- if (flags & KGSL_CONTEXT_USER_GENERATED_TS) {
- if (!(flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
+ if (*flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+ if (!(*flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
ret = -EINVAL;
goto err;
}
drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
}
- if (flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+ if (*flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE;
+ drawctxt->type =
+ (*flags & KGSL_CONTEXT_TYPE_MASK) >> KGSL_CONTEXT_TYPE_SHIFT;
+
ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
if (ret)
goto err;
@@ -242,10 +253,6 @@
if (device->state != KGSL_STATE_HUNG)
adreno_idle(device);
- if (adreno_is_a20x(adreno_dev) && adreno_dev->drawctxt_active)
- kgsl_setstate(&device->mmu, adreno_dev->drawctxt_active->id,
- KGSL_MMUFLAGS_PTUPDATE);
-
kgsl_sharedmem_free(&drawctxt->gpustate);
kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
@@ -306,8 +313,10 @@
return;
}
- KGSL_CTXT_INFO(device, "from %p to %p flags %d\n",
- adreno_dev->drawctxt_active, drawctxt, flags);
+ KGSL_CTXT_INFO(device, "from %d to %d flags %d\n",
+ adreno_dev->drawctxt_active ?
+ adreno_dev->drawctxt_active->id : 0,
+ drawctxt ? drawctxt->id : 0, flags);
/* Save the old context */
adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active);
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index fd60688..f0f3b6b 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -57,6 +57,14 @@
/* Context no fault tolerance */
#define CTXT_FLAGS_NO_FAULT_TOLERANCE BIT(16)
+/* Symbolic table for the adreno draw context type */
+#define ADRENO_DRAWCTXT_TYPES \
+ { KGSL_CONTEXT_TYPE_ANY, "any" }, \
+ { KGSL_CONTEXT_TYPE_GL, "GL" }, \
+ { KGSL_CONTEXT_TYPE_CL, "CL" }, \
+ { KGSL_CONTEXT_TYPE_C2D, "C2D" }, \
+ { KGSL_CONTEXT_TYPE_RS, "RS" }
+
struct kgsl_device;
struct adreno_device;
struct kgsl_device_private;
@@ -93,6 +101,9 @@
unsigned int id;
unsigned int ib_gpu_time_used;
uint32_t flags;
+ uint32_t pagefault;
+ unsigned long pagefault_ts;
+ unsigned int type;
struct kgsl_pagetable *pagetable;
struct kgsl_memdesc gpustate;
unsigned int reg_restore[3];
@@ -125,7 +136,7 @@
int adreno_drawctxt_create(struct kgsl_device *device,
struct kgsl_pagetable *pagetable,
struct kgsl_context *context,
- uint32_t flags);
+ uint32_t *flags);
void adreno_drawctxt_destroy(struct kgsl_device *device,
struct kgsl_context *context);
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index a3fa312..f449870 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -143,10 +143,10 @@
#define CP_IM_STORE 0x2c
/* test 2 memory locations to dword values specified */
-#define CP_TEST_TWO_MEMS 0x71
+#define CP_TEST_TWO_MEMS 0x71
/* PFP waits until the FIFO between the PFP and the ME is empty */
-#define CP_WAIT_FOR_ME 0x13
+#define CP_WAIT_FOR_ME 0x13
/*
* for a20x
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index cf1cf90..5b52fd8 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -12,6 +12,7 @@
*/
#include <linux/vmalloc.h>
+#include <mach/board.h>
#include "kgsl.h"
#include "kgsl_sharedmem.h"
@@ -50,6 +51,7 @@
{CP_DRAW_INDX, "DRW_NDX_"},
{CP_DRAW_INDX_BIN, "DRW_NDXB"},
{CP_EVENT_WRITE, "EVENT_WT"},
+ {CP_MEM_WRITE, "MEM_WRIT"},
{CP_IM_LOAD, "IN__LOAD"},
{CP_IM_LOAD_IMMEDIATE, "IM_LOADI"},
{CP_IM_STORE, "IM_STORE"},
@@ -69,6 +71,14 @@
{CP_WAIT_FOR_IDLE, "WAIT4IDL"},
};
+static const struct pm_id_name pm3_nop_values[] = {
+ {KGSL_CONTEXT_TO_MEM_IDENTIFIER, "CTX_SWCH"},
+ {KGSL_CMD_IDENTIFIER, "CMD__EXT"},
+ {KGSL_CMD_INTERNAL_IDENTIFIER, "CMD__INT"},
+ {KGSL_START_OF_IB_IDENTIFIER, "IB_START"},
+ {KGSL_END_OF_IB_IDENTIFIER, "IB___END"},
+};
+
static uint32_t adreno_is_pm4_len(uint32_t word)
{
if (word == INVALID_RB_CMD)
@@ -128,6 +138,28 @@
return "????????";
}
+static bool adreno_is_pm3_nop_value(uint32_t word)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pm3_nop_values); ++i) {
+ if (word == pm3_nop_values[i].id)
+ return 1;
+ }
+ return 0;
+}
+
+static const char *adreno_pm3_nop_name(uint32_t word)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(pm3_nop_values); ++i) {
+ if (word == pm3_nop_values[i].id)
+ return pm3_nop_values[i].name;
+ }
+ return "????????";
+}
+
static void adreno_dump_regs(struct kgsl_device *device,
const int *registers, int size)
{
@@ -244,8 +276,13 @@
"%s", adreno_pm4_name(ptr4[j]));
*argp = -(adreno_is_pm4_len(ptr4[j])+1);
} else {
- lx += scnprintf(linebuf + lx, linebuflen - lx,
- "%8.8X", ptr4[j]);
+ if (adreno_is_pm3_nop_value(ptr4[j]))
+ lx += scnprintf(linebuf + lx, linebuflen - lx,
+ "%s", adreno_pm3_nop_name(ptr4[j]));
+ else
+ lx += scnprintf(linebuf + lx, linebuflen - lx,
+ "%8.8X", ptr4[j]);
+
if (*argp > 1)
--*argp;
else if (*argp == 1) {
@@ -665,7 +702,7 @@
" %08X\n", r1, r2, r3);
KGSL_LOG_DUMP(device, "PAGETABLE SIZE: %08X ",
- kgsl_mmu_get_ptsize());
+ kgsl_mmu_get_ptsize(&device->mmu));
kgsl_regread(device, MH_MMU_TRAN_ERROR, &r1);
KGSL_LOG_DUMP(device, " TRAN_ERROR = %08X\n", r1);
@@ -703,6 +740,7 @@
mb();
if (device->pm_dump_enable) {
+
if (adreno_is_a2xx(adreno_dev))
adreno_dump_a2xx(device);
else if (adreno_is_a3xx(adreno_dev))
@@ -728,7 +766,7 @@
if (!device->pm_dump_enable) {
KGSL_LOG_DUMP(device,
- "RBBM STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
+ "STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
" | RPTR: %04X | WPTR: %04X\n",
rbbm_status, cp_ib1_base, cp_ib1_bufsz, cp_ib2_base,
cp_ib2_bufsz, cp_rb_rptr, cp_rb_wptr);
@@ -890,7 +928,8 @@
adreno_dump_regs(device, a3xx_registers,
a3xx_registers_count);
- if (adreno_is_a330(adreno_dev))
+ if (adreno_is_a330(adreno_dev) ||
+ adreno_is_a305b(adreno_dev))
adreno_dump_regs(device, a330_registers,
a330_registers_count);
}
diff --git a/drivers/gpu/msm/adreno_postmortem.h b/drivers/gpu/msm/adreno_postmortem.h
deleted file mode 100644
index 7706037..0000000
--- a/drivers/gpu/msm/adreno_postmortem.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __ADRENO_POSTMORTEM_H
-#define __ADRENO_POSTMORTEM_H
-
-struct kgsl_device;
-
-int adreno_postmortem_dump(struct kgsl_device *device, int manual);
-
-#endif
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 179027c..a4bb4fa 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -18,6 +18,7 @@
#include "kgsl.h"
#include "kgsl_sharedmem.h"
#include "kgsl_cffdump.h"
+#include "kgsl_trace.h"
#include "adreno.h"
#include "adreno_pm4types.h"
@@ -319,7 +320,7 @@
return 0;
}
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
+int adreno_ringbuffer_start(struct adreno_ringbuffer *rb)
{
int status;
/*cp_rb_cntl_u cp_rb_cntl; */
@@ -331,9 +332,6 @@
if (rb->flags & KGSL_FLAGS_STARTED)
return 0;
- if (init_ram)
- rb->timestamp[KGSL_MEMSTORE_GLOBAL] = 0;
-
kgsl_sharedmem_set(&rb->memptrs_desc, 0, 0,
sizeof(struct kgsl_rbmemptrs));
@@ -433,8 +431,11 @@
return status;
/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
- if (adreno_is_a305(adreno_dev) || adreno_is_a320(adreno_dev))
+ if (adreno_is_a305(adreno_dev) || adreno_is_a305c(adreno_dev) ||
+ adreno_is_a320(adreno_dev))
adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x000E0602);
+ else if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
+ adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x003E2008);
rb->rptr = 0;
rb->wptr = 0;
@@ -443,7 +444,9 @@
adreno_regwrite(device, REG_CP_ME_CNTL, 0);
/* ME init is GPU specific, so jump into the sub-function */
- adreno_dev->gpudev->rb_init(adreno_dev, rb);
+ status = adreno_dev->gpudev->rb_init(adreno_dev, rb);
+ if (status)
+ return status;
/* idle device to validate ME INIT */
status = adreno_idle(device);
@@ -481,6 +484,7 @@
*/
rb->sizedwords = KGSL_RB_SIZE >> 2;
+ rb->buffer_desc.flags = KGSL_MEMFLAGS_GPUREADONLY;
/* allocate memory for ringbuffer */
status = kgsl_allocate_contiguous(&rb->buffer_desc,
(rb->sizedwords << 2));
@@ -564,6 +568,8 @@
total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
/* 2 dwords to store the start of command sequence */
total_sizedwords += 2;
+ /* internal ib command identifier for the ringbuffer */
+ total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
/* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
total_sizedwords += context ? 13 : 0;
@@ -571,17 +577,25 @@
if (adreno_is_a3xx(adreno_dev))
total_sizedwords += 7;
+ if (adreno_is_a2xx(adreno_dev))
+ total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
+
total_sizedwords += 2; /* scratchpad ts for fault tolerance */
+ total_sizedwords += 3; /* sop timestamp */
+ total_sizedwords += 4; /* eop timestamp */
+
if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
- total_sizedwords += 3; /* sop timestamp */
- total_sizedwords += 4; /* eop timestamp */
total_sizedwords += 3; /* global timestamp without cache
* flush for non-zero context */
- } else {
- total_sizedwords += 4; /* global timestamp for fault tolerance*/
}
+ if (adreno_is_a20x(adreno_dev))
+ total_sizedwords += 2; /* CACHE_FLUSH */
+
+ if (flags & KGSL_CMD_FLAGS_EOF)
+ total_sizedwords += 2;
+
ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
if (!ringcmds) {
/*
@@ -597,23 +611,9 @@
GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
- if (flags & KGSL_CMD_FLAGS_PMODE) {
- /* disable protected mode error checking */
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
- }
-
- for (i = 0; i < sizedwords; i++) {
- GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
- cmds++;
- }
-
- if (flags & KGSL_CMD_FLAGS_PMODE) {
- /* re-enable protected mode error checking */
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
+ if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_INTERNAL_IDENTIFIER);
}
/* always increment the global timestamp. once. */
@@ -635,10 +635,45 @@
GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
+ /* start-of-pipeline timestamp */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_MEM_WRITE, 2));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+ KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
+ if (flags & KGSL_CMD_FLAGS_PMODE) {
+ /* disable protected mode error checking */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
+ }
+
+ for (i = 0; i < sizedwords; i++) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, *cmds);
+ cmds++;
+ }
+
+ if (flags & KGSL_CMD_FLAGS_PMODE) {
+ /* re-enable protected mode error checking */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
+ }
+
+ /* HW Workaround for MMU Page fault
+ * due to memory getting free early before
+ * GPU completes it.
+ */
+ if (adreno_is_a2xx(adreno_dev)) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
+ }
+
if (adreno_is_a3xx(adreno_dev)) {
/*
- * FLush HLSQ lazy updates to make sure there are no
- * rsources pending for indirect loads after the timestamp
+ * Flush HLSQ lazy updates to make sure there are no
+ * resources pending for indirect loads after the timestamp
*/
GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -649,22 +684,19 @@
GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
}
+ /*
+ * end-of-pipeline timestamp. If per context timestamps is not
+ * enabled, then context_id will be KGSL_MEMSTORE_GLOBAL so all
+ * eop timestamps will work out.
+ */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type3_packet(CP_EVENT_WRITE, 3));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
+ KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
+
if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS
&& !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
- /* start-of-pipeline timestamp */
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- cp_type3_packet(CP_MEM_WRITE, 2));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
- /* end-of-pipeline timestamp */
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- cp_type3_packet(CP_EVENT_WRITE, 3));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
- GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_MEM_WRITE, 2));
@@ -673,16 +705,14 @@
eoptimestamp)));
GSL_RB_WRITE(ringcmds, rcmd_gpu,
rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
- } else {
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- cp_type3_packet(CP_EVENT_WRITE, 3));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
- GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- eoptimestamp)));
- GSL_RB_WRITE(ringcmds, rcmd_gpu,
- rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
}
+
+ if (adreno_is_a20x(adreno_dev)) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_EVENT_WRITE, 1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH);
+ }
+
if (context) {
/* Conditional execution based on memory values */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -960,43 +990,31 @@
{
struct kgsl_device *device = dev_priv->device;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- unsigned int *link;
+ unsigned int *link = 0;
unsigned int *cmds;
unsigned int i;
- struct adreno_context *drawctxt;
+ struct adreno_context *drawctxt = NULL;
unsigned int start_index = 0;
+ int ret;
- if (device->state & KGSL_STATE_HUNG)
- return -EBUSY;
+ if (device->state & KGSL_STATE_HUNG) {
+ ret = -EBUSY;
+ goto done;
+ }
+
if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED) ||
- context == NULL || ibdesc == 0 || numibs == 0)
- return -EINVAL;
-
+ context == NULL || ibdesc == 0 || numibs == 0) {
+ ret = -EINVAL;
+ goto done;
+ }
drawctxt = context->devctxt;
if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
KGSL_CTXT_ERR(device, "proc %s failed fault tolerance"
" will not accept commands for context %d\n",
drawctxt->pid_name, drawctxt->id);
- return -EDEADLK;
- }
-
- if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
- KGSL_CTXT_ERR(device,
- "proc %s triggered fault tolerance"
- " skipping commands for context till EOF %d\n",
- drawctxt->pid_name, drawctxt->id);
- if (flags & KGSL_CMD_FLAGS_EOF)
- drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
- numibs = 0;
- }
-
- cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
- GFP_KERNEL);
- if (!link) {
- KGSL_CORE_ERR("kzalloc(%d) failed\n",
- sizeof(unsigned int) * (numibs * 3 + 4));
- return -ENOMEM;
+ ret = -EDEADLK;
+ goto done;
}
/*When preamble is enabled, the preamble buffer with state restoration
@@ -1007,6 +1025,26 @@
adreno_dev->drawctxt_active == drawctxt)
start_index = 1;
+ if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+ KGSL_CTXT_ERR(device,
+ "proc %s triggered fault tolerance"
+ " skipping commands for context till EOF %d\n",
+ drawctxt->pid_name, drawctxt->id);
+ if (flags & KGSL_CMD_FLAGS_EOF)
+ drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+ if (start_index)
+ numibs = 1;
+ else
+ numibs = 0;
+ }
+
+ cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
+ GFP_KERNEL);
+ if (!link) {
+ ret = -ENOMEM;
+ goto done;
+ }
+
if (!start_index) {
*cmds++ = cp_nop_packet(1);
*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
@@ -1021,9 +1059,15 @@
if (unlikely(adreno_dev->ib_check_level >= 1 &&
!_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
ibdesc[i].sizedwords))) {
- kfree(link);
- return -EINVAL;
+ ret = -EINVAL;
+ goto done;
}
+
+ if (ibdesc[i].sizedwords == 0) {
+ ret = -EINVAL;
+ goto done;
+ }
+
*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
*cmds++ = ibdesc[i].gpuaddr;
*cmds++ = ibdesc[i].sizedwords;
@@ -1043,11 +1087,6 @@
(flags & KGSL_CMD_FLAGS_EOF),
&link[0], (cmds - link), *timestamp);
- KGSL_CMD_INFO(device, "ctxt %d g %08x numibs %d ts %d\n",
- context->id, (unsigned int)ibdesc, numibs, *timestamp);
-
- kfree(link);
-
#ifdef CONFIG_MSM_KGSL_CFF_DUMP
/*
* insert wait for idle after every IB1
@@ -1063,9 +1102,16 @@
*/
if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) {
drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT;
- return -EPROTO;
+ ret = -EPROTO;
} else
- return 0;
+ ret = 0;
+
+done:
+ trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs,
+ *timestamp, flags, ret, drawctxt->type);
+
+ kfree(link);
+ return ret;
}
static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index fa03c05..e563ec7 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -97,8 +97,7 @@
int adreno_ringbuffer_init(struct kgsl_device *device);
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb,
- unsigned int init_ram);
+int adreno_ringbuffer_start(struct adreno_ringbuffer *rb);
void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb);
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index f23586e..a76ed87 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -162,6 +162,12 @@
static unsigned int sp_fs_pvt_mem_addr;
/*
+ * Cached value of SP_VS_OBJ_START_REG and SP_FS_OBJ_START_REG.
+ */
+static unsigned int sp_vs_obj_start_reg;
+static unsigned int sp_fs_obj_start_reg;
+
+/*
* Each load state block has two possible types. Each type has a different
* number of dwords per unit. Use this handy lookup table to make sure
* we dump the right amount of data from the indirect buffer
@@ -373,6 +379,26 @@
sp_fs_pvt_mem_addr = 0;
}
+ if (sp_vs_obj_start_reg) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ sp_vs_obj_start_reg & 0xFFFFFFE0, 0,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+ snapshot_frozen_objsize += ret;
+ sp_vs_obj_start_reg = 0;
+ }
+
+ if (sp_fs_obj_start_reg) {
+ ret = kgsl_snapshot_get_object(device, ptbase,
+ sp_fs_obj_start_reg & 0xFFFFFFE0, 0,
+ SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+ snapshot_frozen_objsize += ret;
+ sp_fs_obj_start_reg = 0;
+ }
+
/* Finally: VBOs */
/* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
@@ -444,7 +470,7 @@
int offset = type0_pkt_offset(*ptr);
int i;
- for (i = 0; i < size; i++, offset++) {
+ for (i = 0; i < size - 1; i++, offset++) {
/* Visiblity stream buffer */
@@ -505,11 +531,20 @@
case A3XX_SP_FS_PVT_MEM_ADDR_REG:
sp_fs_pvt_mem_addr = ptr[i + 1];
break;
+ case A3XX_SP_VS_OBJ_START_REG:
+ sp_vs_obj_start_reg = ptr[i + 1];
+ break;
+ case A3XX_SP_FS_OBJ_START_REG:
+ sp_fs_obj_start_reg = ptr[i + 1];
+ break;
}
}
}
}
+static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase,
+ unsigned int gpuaddr, unsigned int dwords);
+
/* Add an IB as a GPU object, but first, parse it to find more goodies within */
static int ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
@@ -549,32 +584,12 @@
if (adreno_cmd_is_ib(src[i])) {
unsigned int gpuaddr = src[i + 1];
unsigned int size = src[i + 2];
- unsigned int ibbase;
- /* Address of the last processed IB2 */
- kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
+ ret = parse_ib(device, ptbase, gpuaddr, size);
- /*
- * If this is the last IB2 that was executed,
- * then push it to make sure it goes into the
- * static space
- */
-
- if (ibbase == gpuaddr)
- push_object(device,
- SNAPSHOT_OBJ_TYPE_IB, ptbase,
- gpuaddr, size);
- else {
- ret = ib_add_gpu_object(device,
- ptbase, gpuaddr, size);
-
- /*
- * If adding the IB failed then stop
- * parsing
- */
- if (ret < 0)
- goto done;
- }
+ /* If adding the IB failed then stop parsing */
+ if (ret < 0)
+ goto done;
} else {
ret = ib_parse_type3(device, &src[i], ptbase);
/*
@@ -604,29 +619,34 @@
return ret;
}
-/* Snapshot the istore memory */
-static int snapshot_istore(struct kgsl_device *device, void *snapshot,
- int remain, void *priv)
+/*
+ * We want to store the last executed IB1 and IB2 in the static region to ensure
+ * that we get at least some information out of the snapshot even if we can't
+ * access the dynamic data from the sysfs file. Push all other IBs on the
+ * dynamic list
+ */
+static inline int parse_ib(struct kgsl_device *device, unsigned int ptbase,
+ unsigned int gpuaddr, unsigned int dwords)
{
- struct kgsl_snapshot_istore *header = snapshot;
- unsigned int *data = snapshot + sizeof(*header);
- struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- int count, i;
+ unsigned int ib1base, ib2base;
+ int ret = 0;
- count = adreno_dev->istore_size * adreno_dev->instruction_size;
+ /*
+ * Check the IB address - if it is either the last executed IB1 or the
+ * last executed IB2 then push it into the static blob otherwise put
+ * it in the dynamic list
+ */
- if (remain < (count * 4) + sizeof(*header)) {
- KGSL_DRV_ERR(device,
- "snapshot: Not enough memory for the istore section");
- return 0;
- }
+ kgsl_regread(device, REG_CP_IB1_BASE, &ib1base);
+ kgsl_regread(device, REG_CP_IB2_BASE, &ib2base);
- header->count = adreno_dev->istore_size;
+ if (gpuaddr == ib1base || gpuaddr == ib2base)
+ push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
+ gpuaddr, dwords);
+ else
+ ret = ib_add_gpu_object(device, ptbase, gpuaddr, dwords);
- for (i = 0; i < count; i++)
- kgsl_regread(device, ADRENO_ISTORE_START + i, &data[i]);
-
- return (count * 4) + sizeof(*header);
+ return ret;
}
/* Snapshot the ringbuffer memory */
@@ -779,12 +799,11 @@
* others get marked at GPU objects
*/
- if (ibaddr == ibbase || memdesc != NULL)
+ if (memdesc != NULL)
push_object(device, SNAPSHOT_OBJ_TYPE_IB,
ptbase, ibaddr, ibsize);
else
- ib_add_gpu_object(device, ptbase, ibaddr,
- ibsize);
+ parse_ib(device, ptbase, ibaddr, ibsize);
}
index = index + 1;
@@ -799,6 +818,64 @@
return size + sizeof(*header);
}
+static int snapshot_capture_mem_list(struct kgsl_device *device, void *snapshot,
+ int remain, void *priv)
+{
+ struct kgsl_snapshot_replay_mem_list *header = snapshot;
+ struct kgsl_process_private *private;
+ unsigned int ptbase;
+ struct rb_node *node;
+ struct kgsl_mem_entry *entry = NULL;
+ int num_mem;
+ unsigned int *data = snapshot + sizeof(*header);
+
+ ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
+ mutex_lock(&kgsl_driver.process_mutex);
+ list_for_each_entry(private, &kgsl_driver.process_list, list) {
+ if (kgsl_mmu_pt_equal(&device->mmu, private->pagetable,
+ ptbase))
+ break;
+ }
+ mutex_unlock(&kgsl_driver.process_mutex);
+ if (!private) {
+ KGSL_DRV_ERR(device,
+ "Failed to get pointer to process private structure\n");
+ return 0;
+ }
+ /* We need to know the number of memory objects that the process has */
+ spin_lock(&private->mem_lock);
+ for (node = rb_first(&private->mem_rb), num_mem = 0; node; ) {
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+ node = rb_next(&entry->node);
+ num_mem++;
+ }
+
+ if (remain < ((num_mem * 3 * sizeof(unsigned int)) +
+ sizeof(*header))) {
+ KGSL_DRV_ERR(device,
+ "snapshot: Not enough memory for the mem list section");
+ spin_unlock(&private->mem_lock);
+ return 0;
+ }
+ header->num_entries = num_mem;
+ header->ptbase = ptbase;
+ /*
+ * Walk throught the memory list and store the
+ * tuples(gpuaddr, size, memtype) in snapshot
+ */
+ for (node = rb_first(&private->mem_rb); node; ) {
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+ node = rb_next(&entry->node);
+
+ *data++ = entry->memdesc.gpuaddr;
+ *data++ = entry->memdesc.size;
+ *data++ = (entry->memdesc.priv & KGSL_MEMTYPE_MASK) >>
+ KGSL_MEMTYPE_SHIFT;
+ }
+ spin_unlock(&private->mem_lock);
+ return sizeof(*header) + (num_mem * 3 * sizeof(unsigned int));
+}
+
/* Snapshot the memory for an indirect buffer */
static int snapshot_ib(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
@@ -829,15 +906,14 @@
continue;
if (adreno_cmd_is_ib(*src))
- push_object(device, SNAPSHOT_OBJ_TYPE_IB,
- obj->ptbase, src[1], src[2]);
- else {
+ ret = parse_ib(device, obj->ptbase, src[1],
+ src[2]);
+ else
ret = ib_parse_type3(device, src, obj->ptbase);
- /* Stop parsing if the type3 decode fails */
- if (ret < 0)
- break;
- }
+ /* Stop parsing if the type3 decode fails */
+ if (ret < 0)
+ break;
}
}
@@ -903,6 +979,13 @@
snapshot, remain, snapshot_rb, NULL);
/*
+ * Add a section that lists (gpuaddr, size, memtype) tuples of the
+ * hanging process
+ */
+ snapshot = kgsl_snapshot_add_section(device,
+ KGSL_SNAPSHOT_SECTION_MEMLIST, snapshot, remain,
+ snapshot_capture_mem_list, NULL);
+ /*
* Make sure that the last IB1 that was being executed is dumped.
* Since this was the last IB1 that was processed, we should have
* already added it to the list during the ringbuffer parse but we
@@ -950,17 +1033,6 @@
for (i = 0; i < objbufptr; i++)
snapshot = dump_object(device, i, snapshot, remain);
- /*
- * Only dump the istore on a hang - reading it on a running system
- * has a non 0 chance of hanging the GPU
- */
-
- if (hang) {
- snapshot = kgsl_snapshot_add_section(device,
- KGSL_SNAPSHOT_SECTION_ISTORE, snapshot, remain,
- snapshot_istore, NULL);
- }
-
/* Add GPU specific sections - registers mainly, but other stuff too */
if (adreno_dev->gpudev->snapshot)
snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 3582a41..53ef392 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -18,7 +18,7 @@
#include <linux/uaccess.h>
#include <linux/interrupt.h>
#include <linux/workqueue.h>
-#include <linux/android_pmem.h>
+
#include <linux/vmalloc.h>
#include <linux/pm_runtime.h>
#include <linux/genlock.h>
@@ -28,6 +28,7 @@
#include <linux/msm_ion.h>
#include <linux/io.h>
#include <mach/socinfo.h>
+#include <linux/mman.h>
#include "kgsl.h"
#include "kgsl_debugfs.h"
@@ -52,6 +53,52 @@
static struct ion_client *kgsl_ion_client;
+int kgsl_memfree_hist_init(void)
+{
+ void *base;
+
+ base = kzalloc(KGSL_MEMFREE_HIST_SIZE, GFP_KERNEL);
+ kgsl_driver.memfree_hist.base_hist_rb = base;
+ if (base == NULL)
+ return -ENOMEM;
+ kgsl_driver.memfree_hist.size = KGSL_MEMFREE_HIST_SIZE;
+ kgsl_driver.memfree_hist.wptr = base;
+ return 0;
+}
+
+void kgsl_memfree_hist_exit(void)
+{
+ kfree(kgsl_driver.memfree_hist.base_hist_rb);
+ kgsl_driver.memfree_hist.base_hist_rb = NULL;
+}
+
+void kgsl_memfree_hist_set_event(unsigned int pid, unsigned int gpuaddr,
+ unsigned int size, int flags)
+{
+ struct kgsl_memfree_hist_elem *p;
+
+ void *base = kgsl_driver.memfree_hist.base_hist_rb;
+ int rbsize = kgsl_driver.memfree_hist.size;
+
+ if (base == NULL)
+ return;
+
+ mutex_lock(&kgsl_driver.memfree_hist_mutex);
+ p = kgsl_driver.memfree_hist.wptr;
+ p->pid = pid;
+ p->gpuaddr = gpuaddr;
+ p->size = size;
+ p->flags = flags;
+
+ kgsl_driver.memfree_hist.wptr++;
+ if ((void *)kgsl_driver.memfree_hist.wptr >= base+rbsize) {
+ kgsl_driver.memfree_hist.wptr =
+ (struct kgsl_memfree_hist_elem *)base;
+ }
+ mutex_unlock(&kgsl_driver.memfree_hist_mutex);
+}
+
+
/* kgsl_get_mem_entry - get the mem_entry structure for the specified object
* @device - Pointer to the device structure
* @ptbase - the pagetable base of the object
@@ -99,13 +146,6 @@
return entry;
}
-unsigned int kgsl_get_alloc_size(int detailed)
-{
- unsigned int ret = 0;
-
- return ret;
-}
-
void
kgsl_mem_entry_destroy(struct kref *kref)
{
@@ -143,9 +183,19 @@
}
EXPORT_SYMBOL(kgsl_mem_entry_destroy);
-static
-void kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
- struct kgsl_process_private *process)
+/**
+ * kgsl_mem_entry_track_gpuaddr - Insert a mem_entry in the address tree
+ * @process: the process that owns the memory
+ * @entry: the memory entry
+ *
+ * Insert a kgsl_mem_entry in to the rb_tree for searching by GPU address.
+ * Not all mem_entries will have gpu addresses when first created, so this
+ * function may be called after creation when the GPU address is finally
+ * assigned.
+ */
+static void
+kgsl_mem_entry_track_gpuaddr(struct kgsl_process_private *process,
+ struct kgsl_mem_entry *entry)
{
struct rb_node **node;
struct rb_node *parent = NULL;
@@ -170,8 +220,48 @@
rb_insert_color(&entry->node, &process->mem_rb);
spin_unlock(&process->mem_lock);
+}
+/**
+ * kgsl_mem_entry_attach_process - Attach a mem_entry to its owner process
+ * @entry: the memory entry
+ * @process: the owner process
+ *
+ * Attach a newly created mem_entry to its owner process so that
+ * it can be found later. The mem_entry will be added to mem_idr and have
+ * its 'id' field assigned. If the GPU address has been set, the entry
+ * will also be added to the mem_rb tree.
+ *
+ * @returns - 0 on success or error code on failure.
+ */
+static int
+kgsl_mem_entry_attach_process(struct kgsl_mem_entry *entry,
+ struct kgsl_process_private *process)
+{
+ int ret;
+
+ while (1) {
+ if (idr_pre_get(&process->mem_idr, GFP_KERNEL) == 0) {
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ spin_lock(&process->mem_lock);
+ ret = idr_get_new_above(&process->mem_idr, entry, 1,
+ &entry->id);
+ spin_unlock(&process->mem_lock);
+
+ if (ret == 0)
+ break;
+ else if (ret != -EAGAIN)
+ goto err;
+ }
entry->priv = process;
+
+ if (entry->memdesc.gpuaddr != 0)
+ kgsl_mem_entry_track_gpuaddr(process, entry);
+err:
+ return ret;
}
/* Detach a memory entry from a process and unmap it from the MMU */
@@ -181,6 +271,17 @@
if (entry == NULL)
return;
+ spin_lock(&entry->priv->mem_lock);
+
+ if (entry->id != 0)
+ idr_remove(&entry->priv->mem_idr, entry->id);
+ entry->id = 0;
+
+ if (entry->memdesc.gpuaddr != 0)
+ rb_erase(&entry->node, &entry->priv->mem_rb);
+
+ spin_unlock(&entry->priv->mem_lock);
+
entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
entry->priv = NULL;
@@ -199,14 +300,19 @@
context = kzalloc(sizeof(*context), GFP_KERNEL);
- if (context == NULL)
- return NULL;
+ if (context == NULL) {
+ KGSL_DRV_INFO(dev_priv->device, "kzalloc(%d) failed\n",
+ sizeof(*context));
+ return ERR_PTR(-ENOMEM);
+ }
while (1) {
if (idr_pre_get(&dev_priv->device->context_idr,
GFP_KERNEL) == 0) {
- kfree(context);
- return NULL;
+ KGSL_DRV_INFO(dev_priv->device,
+ "idr_pre_get: ENOMEM\n");
+ ret = -ENOMEM;
+ goto func_end;
}
ret = idr_get_new_above(&dev_priv->device->context_idr,
@@ -216,26 +322,25 @@
break;
}
- if (ret) {
- kfree(context);
- return NULL;
- }
+ if (ret)
+ goto func_end;
/* MAX - 1, there is one memdesc in memstore for device info */
if (id >= KGSL_MEMSTORE_MAX) {
- KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d "
+ KGSL_DRV_INFO(dev_priv->device, "cannot have more than %d "
"ctxts due to memstore limitation\n",
KGSL_MEMSTORE_MAX);
idr_remove(&dev_priv->device->context_idr, id);
- kfree(context);
- return NULL;
+ ret = -ENOSPC;
+ goto func_end;
}
kref_init(&context->refcount);
context->id = id;
context->dev_priv = dev_priv;
- if (kgsl_sync_timeline_create(context)) {
+ ret = kgsl_sync_timeline_create(context);
+ if (ret) {
idr_remove(&dev_priv->device->context_idr, id);
goto func_end;
}
@@ -257,7 +362,7 @@
func_end:
if (ret) {
kfree(context);
- return NULL;
+ return ERR_PTR(ret);
}
return context;
@@ -362,24 +467,6 @@
return ret;
}
-int kgsl_register_ts_notifier(struct kgsl_device *device,
- struct notifier_block *nb)
-{
- BUG_ON(device == NULL);
- return atomic_notifier_chain_register(&device->ts_notifier_list,
- nb);
-}
-EXPORT_SYMBOL(kgsl_register_ts_notifier);
-
-int kgsl_unregister_ts_notifier(struct kgsl_device *device,
- struct notifier_block *nb)
-{
- BUG_ON(device == NULL);
- return atomic_notifier_chain_unregister(&device->ts_notifier_list,
- nb);
-}
-EXPORT_SYMBOL(kgsl_unregister_ts_notifier);
-
int kgsl_check_timestamp(struct kgsl_device *device,
struct kgsl_context *context, unsigned int timestamp)
{
@@ -430,7 +517,7 @@
INIT_COMPLETION(device->hwaccess_gate);
device->ftbl->suspend_context(device);
device->ftbl->stop(device);
- pm_qos_update_request(&device->pm_qos_req_dma,
+ pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
PM_QOS_DEFAULT_VALUE);
kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
break;
@@ -579,12 +666,15 @@
private->pid = task_tgid_nr(current);
private->mem_rb = RB_ROOT;
+ idr_init(&private->mem_idr);
+
if (kgsl_mmu_enabled())
{
unsigned long pt_name;
+ struct kgsl_mmu *mmu = &cur_dev_priv->device->mmu;
pt_name = task_tgid_nr(current);
- private->pagetable = kgsl_mmu_getpagetable(pt_name);
+ private->pagetable = kgsl_mmu_getpagetable(mmu, pt_name);
if (private->pagetable == NULL) {
kfree(private);
private = NULL;
@@ -607,7 +697,7 @@
struct kgsl_process_private *private)
{
struct kgsl_mem_entry *entry = NULL;
- struct rb_node *node;
+ int next = 0;
if (!private)
return;
@@ -622,14 +712,22 @@
list_del(&private->list);
- for (node = rb_first(&private->mem_rb); node; ) {
- entry = rb_entry(node, struct kgsl_mem_entry, node);
- node = rb_next(&entry->node);
-
- rb_erase(&entry->node, &private->mem_rb);
+ while (1) {
+ rcu_read_lock();
+ entry = idr_get_next(&private->mem_idr, &next);
+ rcu_read_unlock();
+ if (entry == NULL)
+ break;
kgsl_mem_entry_detach_process(entry);
+ /*
+ * Always start back at the beginning, to
+ * ensure all entries are removed,
+ * like list_for_each_entry_safe.
+ */
+ next = 0;
}
kgsl_mmu_putpagetable(private->pagetable);
+ idr_destroy(&private->mem_idr);
kfree(private);
unlock:
mutex_unlock(&kgsl_driver.process_mutex);
@@ -717,13 +815,6 @@
dev_priv->device = device;
filep->private_data = dev_priv;
- /* Get file (per process) private struct */
- dev_priv->process_priv = kgsl_get_process_private(dev_priv);
- if (dev_priv->process_priv == NULL) {
- result = -ENOMEM;
- goto err_freedevpriv;
- }
-
mutex_lock(&device->mutex);
kgsl_check_suspended(device);
@@ -731,26 +822,45 @@
kgsl_sharedmem_set(&device->memstore, 0, 0,
device->memstore.size);
- result = device->ftbl->start(device, true);
+ result = device->ftbl->init(device);
+ if (result)
+ goto err_freedevpriv;
- if (result) {
- mutex_unlock(&device->mutex);
- goto err_putprocess;
- }
+ result = device->ftbl->start(device);
+ if (result)
+ goto err_freedevpriv;
+
kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
}
device->open_count++;
mutex_unlock(&device->mutex);
+ /*
+ * Get file (per process) private struct. This must be done
+ * after the first start so that the global pagetable mappings
+ * are set up before we create the per-process pagetable.
+ */
+ dev_priv->process_priv = kgsl_get_process_private(dev_priv);
+ if (dev_priv->process_priv == NULL) {
+ result = -ENOMEM;
+ goto err_stop;
+ }
+
KGSL_DRV_INFO(device, "Initialized %s: mmu=%s pagetable_count=%d\n",
device->name, kgsl_mmu_enabled() ? "on" : "off",
kgsl_pagetable_count);
return result;
-err_putprocess:
- kgsl_put_process_private(device, dev_priv->process_priv);
+err_stop:
+ mutex_lock(&device->mutex);
+ device->open_count--;
+ if (device->open_count == 0) {
+ result = device->ftbl->stop(device);
+ kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+ }
err_freedevpriv:
+ mutex_unlock(&device->mutex);
filep->private_data = NULL;
kfree(dev_priv);
err_pmruntime:
@@ -765,7 +875,7 @@
{
struct rb_node *node = private->mem_rb.rb_node;
- if (!kgsl_mmu_gpuaddr_in_range(gpuaddr))
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
return NULL;
while (node != NULL) {
@@ -798,6 +908,77 @@
return kgsl_sharedmem_find_region(private, gpuaddr, 1);
}
+/**
+ * kgsl_sharedmem_region_empty - Check if an addression region is empty
+ *
+ * @private: private data for the process to check.
+ * @gpuaddr: start address of the region
+ * @size: length of the region.
+ *
+ * Checks that there are no existing allocations within an address
+ * region. Note that unlike other kgsl_sharedmem* search functions,
+ * this one manages locking on its own.
+ */
+int
+kgsl_sharedmem_region_empty(struct kgsl_process_private *private,
+ unsigned int gpuaddr, size_t size)
+{
+ int result = 1;
+ unsigned int gpuaddr_end = gpuaddr + size;
+
+ struct rb_node *node = private->mem_rb.rb_node;
+
+ if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+ return 0;
+
+ /* don't overflow */
+ if (gpuaddr_end < gpuaddr)
+ return 0;
+
+ spin_lock(&private->mem_lock);
+ node = private->mem_rb.rb_node;
+ while (node != NULL) {
+ struct kgsl_mem_entry *entry;
+ unsigned int memdesc_start, memdesc_end;
+
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+
+ memdesc_start = entry->memdesc.gpuaddr;
+ memdesc_end = memdesc_start
+ + kgsl_memdesc_mmapsize(&entry->memdesc);
+
+ if (gpuaddr_end <= memdesc_start)
+ node = node->rb_left;
+ else if (memdesc_end <= gpuaddr)
+ node = node->rb_right;
+ else {
+ result = 0;
+ break;
+ }
+ }
+ spin_unlock(&private->mem_lock);
+ return result;
+}
+
+/**
+ * kgsl_sharedmem_find_id - find a memory entry by id
+ * @process: the owning process
+ * @id: id to find
+ *
+ * @returns - the mem_entry or NULL
+ */
+static inline struct kgsl_mem_entry *
+kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id)
+{
+ struct kgsl_mem_entry *entry;
+
+ rcu_read_lock();
+ entry = idr_find(&process->mem_idr, id);
+ rcu_read_unlock();
+
+ return entry;
+}
+
/*call all ioctl sub functions with driver locked*/
static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
@@ -934,11 +1115,8 @@
int result;
context = kgsl_find_context(dev_priv, param->context_id);
- if (context == NULL) {
- KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
- param->context_id);
+ if (context == NULL)
return -EINVAL;
- }
/*
* A reference count is needed here, because waittimestamp may
* block with the device mutex unlocked and userspace could
@@ -955,6 +1133,7 @@
unsigned int cmd, void *data)
{
int result = 0;
+ int i = 0;
struct kgsl_ringbuffer_issueibcmds *param = data;
struct kgsl_ibdesc *ibdesc;
struct kgsl_context *context;
@@ -962,20 +1141,11 @@
context = kgsl_find_context(dev_priv, param->drawctxt_id);
if (context == NULL) {
result = -EINVAL;
- KGSL_DRV_ERR(dev_priv->device,
- "invalid context_id %d\n",
- param->drawctxt_id);
goto done;
}
if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) {
- KGSL_DRV_INFO(dev_priv->device,
- "Using IB list mode for ib submission, numibs: %d\n",
- param->numibs);
if (!param->numibs) {
- KGSL_DRV_ERR(dev_priv->device,
- "Invalid numibs as parameter: %d\n",
- param->numibs);
result = -EINVAL;
goto done;
}
@@ -986,9 +1156,6 @@
*/
if (param->numibs > 10000) {
- KGSL_DRV_ERR(dev_priv->device,
- "Too many IBs submitted. count: %d max 10000\n",
- param->numibs);
result = -EINVAL;
goto done;
}
@@ -1028,6 +1195,18 @@
param->numibs = 1;
}
+ for (i = 0; i < param->numibs; i++) {
+ struct kgsl_pagetable *pt = dev_priv->process_priv->pagetable;
+
+ if (!kgsl_mmu_gpuaddr_in_range(pt, ibdesc[i].gpuaddr)) {
+ result = -ERANGE;
+ KGSL_DRV_ERR(dev_priv->device,
+ "invalid ib base GPU virtual addr %x\n",
+ ibdesc[i].gpuaddr);
+ goto free_ibdesc;
+ }
+ }
+
result = dev_priv->device->ftbl->issueibcmds(dev_priv,
context,
ibdesc,
@@ -1035,8 +1214,6 @@
¶m->timestamp,
param->flags);
- trace_kgsl_issueibcmds(dev_priv->device, param, ibdesc, result);
-
free_ibdesc:
kfree(ibdesc);
done:
@@ -1075,11 +1252,8 @@
struct kgsl_context *context;
context = kgsl_find_context(dev_priv, param->context_id);
- if (context == NULL) {
- KGSL_DRV_ERR(dev_priv->device, "invalid context_id %d\n",
- param->context_id);
+ if (context == NULL)
return -EINVAL;
- }
return _cmdstream_readtimestamp(dev_priv, context,
param->type, ¶m->timestamp);
@@ -1089,9 +1263,6 @@
void *priv, u32 id, u32 timestamp)
{
struct kgsl_mem_entry *entry = priv;
- spin_lock(&entry->priv->mem_lock);
- rb_erase(&entry->node, &entry->priv->mem_rb);
- spin_unlock(&entry->priv->mem_lock);
trace_kgsl_mem_timestamp_free(device, entry, id, timestamp, 0);
kgsl_mem_entry_detach_process(entry);
}
@@ -1144,11 +1315,8 @@
struct kgsl_context *context;
context = kgsl_find_context(dev_priv, param->context_id);
- if (context == NULL) {
- KGSL_DRV_ERR(dev_priv->device,
- "invalid drawctxt context_id %d\n", param->context_id);
+ if (context == NULL)
return -EINVAL;
- }
return _cmdstream_freememontimestamp(dev_priv, param->gpuaddr,
context, param->timestamp, param->type);
@@ -1163,22 +1331,22 @@
context = kgsl_create_context(dev_priv);
- if (context == NULL) {
- result = -ENOMEM;
+ if (IS_ERR(context)) {
+ result = PTR_ERR(context);
goto done;
}
if (dev_priv->device->ftbl->drawctxt_create) {
result = dev_priv->device->ftbl->drawctxt_create(
dev_priv->device, dev_priv->process_priv->pagetable,
- context, param->flags);
+ context, ¶m->flags);
if (result)
goto done;
}
trace_kgsl_context_create(dev_priv->device, context, param->flags);
param->drawctxt_id = context->id;
done:
- if (result && context)
+ if (result && !IS_ERR(context))
kgsl_context_detach(context);
return result;
@@ -1206,27 +1374,52 @@
static long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
- int result = 0;
struct kgsl_sharedmem_free *param = data;
struct kgsl_process_private *private = dev_priv->process_priv;
struct kgsl_mem_entry *entry = NULL;
spin_lock(&private->mem_lock);
entry = kgsl_sharedmem_find(private, param->gpuaddr);
- if (entry)
- rb_erase(&entry->node, &private->mem_rb);
-
spin_unlock(&private->mem_lock);
- if (entry) {
- trace_kgsl_mem_free(entry);
- kgsl_mem_entry_detach_process(entry);
- } else {
- KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
- result = -EINVAL;
+ if (!entry) {
+ KGSL_MEM_INFO(dev_priv->device, "invalid gpuaddr %08x\n",
+ param->gpuaddr);
+ return -EINVAL;
}
+ trace_kgsl_mem_free(entry);
- return result;
+ kgsl_memfree_hist_set_event(entry->priv->pid,
+ entry->memdesc.gpuaddr,
+ entry->memdesc.size,
+ entry->memdesc.flags);
+
+ kgsl_mem_entry_detach_process(entry);
+ return 0;
+}
+
+static long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
+ unsigned int cmd, void *data)
+{
+ struct kgsl_gpumem_free_id *param = data;
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry = NULL;
+
+ entry = kgsl_sharedmem_find_id(private, param->id);
+
+ if (!entry) {
+ KGSL_MEM_INFO(dev_priv->device, "invalid id %d\n", param->id);
+ return -EINVAL;
+ }
+ trace_kgsl_mem_free(entry);
+
+ kgsl_memfree_hist_set_event(entry->priv->pid,
+ entry->memdesc.gpuaddr,
+ entry->memdesc.size,
+ entry->memdesc.flags);
+
+ kgsl_mem_entry_detach_process(entry);
+ return 0;
}
static struct vm_area_struct *kgsl_get_vma_from_start_addr(unsigned int addr)
@@ -1257,11 +1450,10 @@
dev_t rdev;
struct fb_info *info;
+ *start = 0;
+ *vstart = 0;
+ *len = 0;
*filep = NULL;
-#ifdef CONFIG_ANDROID_PMEM
- if (!get_pmem_file(fd, start, vstart, len, filep))
- return 0;
-#endif
fbfile = fget(fd);
if (fbfile == NULL) {
@@ -1302,10 +1494,8 @@
ret = -ERANGE;
- if (phys == 0) {
- KGSL_CORE_ERR("kgsl_get_phys_file returned phys=0\n");
+ if (phys == 0)
goto err;
- }
/* Make sure the length of the region, the offset and the desired
* size are all page aligned or bail
@@ -1313,19 +1503,13 @@
if ((len & ~PAGE_MASK) ||
(offset & ~PAGE_MASK) ||
(size & ~PAGE_MASK)) {
- KGSL_CORE_ERR("length %lu, offset %u or size %u "
- "is not page aligned\n",
- len, offset, size);
+ KGSL_CORE_ERR("length offset or size is not page aligned\n");
goto err;
}
/* The size or offset can never be greater than the PMEM length */
- if (offset >= len || size > len) {
- KGSL_CORE_ERR("offset %u or size %u "
- "exceeds pmem length %lu\n",
- offset, size, len);
+ if (offset >= len || size > len)
goto err;
- }
/* If size is 0, then adjust it to default to the size of the region
* minus the offset. If size isn't zero, then make sure that it will
@@ -1343,6 +1527,8 @@
entry->memdesc.size = size;
entry->memdesc.physaddr = phys + offset;
entry->memdesc.hostptr = (void *) (virt + offset);
+ /* USE_CPU_MAP is not impemented for PMEM. */
+ entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
ret = memdesc_sg_phys(&entry->memdesc, phys + offset, size);
if (ret)
@@ -1350,18 +1536,14 @@
return 0;
err:
-#ifdef CONFIG_ANDROID_PMEM
- put_pmem_file(filep);
-#endif
return ret;
}
static int memdesc_sg_virt(struct kgsl_memdesc *memdesc,
- void *addr, int size)
+ unsigned long paddr, int size)
{
int i;
int sglen = PAGE_ALIGN(size) / PAGE_SIZE;
- unsigned long paddr = (unsigned long) addr;
memdesc->sg = kgsl_sg_alloc(sglen);
@@ -1412,34 +1594,33 @@
return -EINVAL;
}
-static int kgsl_setup_hostptr(struct kgsl_mem_entry *entry,
+static int kgsl_setup_useraddr(struct kgsl_mem_entry *entry,
struct kgsl_pagetable *pagetable,
- void *hostptr, unsigned int offset,
+ unsigned long useraddr, unsigned int offset,
size_t size)
{
struct vm_area_struct *vma;
unsigned int len;
down_read(¤t->mm->mmap_sem);
- vma = find_vma(current->mm, (unsigned int) hostptr);
+ vma = find_vma(current->mm, useraddr);
up_read(¤t->mm->mmap_sem);
if (!vma) {
- KGSL_CORE_ERR("find_vma(%p) failed\n", hostptr);
+ KGSL_CORE_ERR("find_vma(%lx) failed\n", useraddr);
return -EINVAL;
}
/* We don't necessarily start at vma->vm_start */
- len = vma->vm_end - (unsigned long) hostptr;
+ len = vma->vm_end - useraddr;
if (offset >= len)
return -EINVAL;
- if (!KGSL_IS_PAGE_ALIGNED((unsigned long) hostptr) ||
+ if (!KGSL_IS_PAGE_ALIGNED(useraddr) ||
!KGSL_IS_PAGE_ALIGNED(len)) {
- KGSL_CORE_ERR("user address len(%u)"
- "and start(%p) must be page"
- "aligned\n", len, hostptr);
+ KGSL_CORE_ERR("bad alignment: start(%lx) len(%u)\n",
+ useraddr, len);
return -EINVAL;
}
@@ -1460,28 +1641,29 @@
entry->memdesc.pagetable = pagetable;
entry->memdesc.size = size;
- entry->memdesc.hostptr = hostptr + (offset & PAGE_MASK);
+ entry->memdesc.useraddr = useraddr + (offset & PAGE_MASK);
+ if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
+ entry->memdesc.gpuaddr = entry->memdesc.useraddr;
- return memdesc_sg_virt(&entry->memdesc,
- hostptr + (offset & PAGE_MASK), size);
+ return memdesc_sg_virt(&entry->memdesc, entry->memdesc.useraddr,
+ size);
}
#ifdef CONFIG_ASHMEM
static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
struct kgsl_pagetable *pagetable,
- int fd, void *hostptr, size_t size)
+ int fd, unsigned long useraddr, size_t size)
{
int ret;
struct vm_area_struct *vma;
struct file *filep, *vmfile;
unsigned long len;
- unsigned int hostaddr = (unsigned int) hostptr;
- vma = kgsl_get_vma_from_start_addr(hostaddr);
+ vma = kgsl_get_vma_from_start_addr(useraddr);
if (vma == NULL)
return -EINVAL;
- if (vma->vm_pgoff || vma->vm_start != hostaddr) {
+ if (vma->vm_pgoff || vma->vm_start != useraddr) {
KGSL_CORE_ERR("Invalid vma region\n");
return -EINVAL;
}
@@ -1492,8 +1674,8 @@
size = len;
if (size != len) {
- KGSL_CORE_ERR("Invalid size %d for vma region %p\n",
- size, hostptr);
+ KGSL_CORE_ERR("Invalid size %d for vma region %lx\n",
+ size, useraddr);
return -EINVAL;
}
@@ -1513,9 +1695,11 @@
entry->priv_data = filep;
entry->memdesc.pagetable = pagetable;
entry->memdesc.size = ALIGN(size, PAGE_SIZE);
- entry->memdesc.hostptr = hostptr;
+ entry->memdesc.useraddr = useraddr;
+ if (kgsl_memdesc_use_cpu_map(&entry->memdesc))
+ entry->memdesc.gpuaddr = entry->memdesc.useraddr;
- ret = memdesc_sg_virt(&entry->memdesc, hostptr, size);
+ ret = memdesc_sg_virt(&entry->memdesc, useraddr, size);
if (ret)
goto err;
@@ -1528,18 +1712,23 @@
#else
static int kgsl_setup_ashmem(struct kgsl_mem_entry *entry,
struct kgsl_pagetable *pagetable,
- int fd, void *hostptr, size_t size)
+ int fd, unsigned long useraddr, size_t size)
{
return -EINVAL;
}
#endif
static int kgsl_setup_ion(struct kgsl_mem_entry *entry,
- struct kgsl_pagetable *pagetable, int fd)
+ struct kgsl_pagetable *pagetable, void *data)
{
struct ion_handle *handle;
struct scatterlist *s;
struct sg_table *sg_table;
+ struct kgsl_map_user_mem *param = data;
+ int fd = param->fd;
+
+ if (!param->len)
+ return -EINVAL;
if (IS_ERR_OR_NULL(kgsl_ion_client))
return -ENODEV;
@@ -1554,6 +1743,8 @@
entry->priv_data = handle;
entry->memdesc.pagetable = pagetable;
entry->memdesc.size = 0;
+ /* USE_CPU_MAP is not impemented for ION. */
+ entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
sg_table = ion_sg_table(kgsl_ion_client, handle);
@@ -1571,6 +1762,8 @@
entry->memdesc.sglen++;
}
+ entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size);
+
return 0;
err:
ion_free(kgsl_ion_client, handle);
@@ -1596,7 +1789,20 @@
else
memtype = param->memtype;
+ /*
+ * Mask off unknown flags from userspace. This way the caller can
+ * check if a flag is supported by looking at the returned flags.
+ * Note: CACHEMODE is ignored for this call. Caching should be
+ * determined by type of allocation being mapped.
+ */
+ param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+ | KGSL_MEMTYPE_MASK
+ | KGSL_MEMALIGN_MASK
+ | KGSL_MEMFLAGS_USE_CPU_MAP;
+
entry->memdesc.flags = param->flags;
+ if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
+ entry->memdesc.flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
switch (memtype) {
case KGSL_USER_MEM_TYPE_PMEM:
@@ -1622,8 +1828,8 @@
if (param->hostptr == 0)
break;
- result = kgsl_setup_hostptr(entry, private->pagetable,
- (void *) param->hostptr,
+ result = kgsl_setup_useraddr(entry, private->pagetable,
+ param->hostptr,
param->offset, param->len);
entry->memtype = KGSL_MEM_ENTRY_USER;
break;
@@ -1640,14 +1846,13 @@
break;
result = kgsl_setup_ashmem(entry, private->pagetable,
- param->fd, (void *) param->hostptr,
+ param->fd, param->hostptr,
param->len);
entry->memtype = KGSL_MEM_ENTRY_ASHMEM;
break;
case KGSL_USER_MEM_TYPE_ION:
- result = kgsl_setup_ion(entry, private->pagetable,
- param->fd);
+ result = kgsl_setup_ion(entry, private->pagetable, data);
break;
default:
KGSL_CORE_ERR("Invalid memory type: %x\n", memtype);
@@ -1662,27 +1867,31 @@
else if (entry->memdesc.size >= SZ_64K)
kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64));
- result = kgsl_mmu_map(private->pagetable,
- &entry->memdesc,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-
+ result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
if (result)
goto error_put_file_ptr;
/* Adjust the returned value for a non 4k aligned offset */
param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK);
+ /* echo back flags */
+ param->flags = entry->memdesc.flags;
+
+ result = kgsl_mem_entry_attach_process(entry, private);
+ if (result)
+ goto error_unmap;
KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped,
kgsl_driver.stats.mapped_max);
kgsl_process_add_stats(private, entry->memtype, param->len);
- kgsl_mem_entry_attach_process(entry, private);
trace_kgsl_mem_map(entry, param->fd);
kgsl_check_idle(dev_priv->device);
return result;
+error_unmap:
+ kgsl_mmu_unmap(private->pagetable, &entry->memdesc);
error_put_file_ptr:
switch (entry->memtype) {
case KGSL_MEM_ENTRY_PMEM:
@@ -1702,33 +1911,136 @@
return result;
}
-/*This function flushes a graphics memory allocation from CPU cache
- *when caching is enabled with MMU*/
+static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, int op)
+{
+ int ret = 0;
+ int cacheop;
+ int mode;
+
+ /*
+ * Flush is defined as (clean | invalidate). If both bits are set, then
+ * do a flush, otherwise check for the individual bits and clean or inv
+ * as requested
+ */
+
+ if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH)
+ cacheop = KGSL_CACHE_OP_FLUSH;
+ else if (op & KGSL_GPUMEM_CACHE_CLEAN)
+ cacheop = KGSL_CACHE_OP_CLEAN;
+ else if (op & KGSL_GPUMEM_CACHE_INV)
+ cacheop = KGSL_CACHE_OP_INV;
+ else {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+ if (mode != KGSL_CACHEMODE_UNCACHED
+ && mode != KGSL_CACHEMODE_WRITECOMBINE) {
+ trace_kgsl_mem_sync_cache(entry, op);
+ kgsl_cache_range_op(&entry->memdesc, cacheop);
+ }
+
+done:
+ return ret;
+}
+
+/* New cache sync function - supports both directions (clean and invalidate) */
+
+static long
+kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+ unsigned int cmd, void *data)
+{
+ struct kgsl_gpumem_sync_cache *param = data;
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry = NULL;
+
+ if (param->id != 0) {
+ entry = kgsl_sharedmem_find_id(private, param->id);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
+ param->id);
+ return -EINVAL;
+ }
+ } else if (param->gpuaddr != 0) {
+ spin_lock(&private->mem_lock);
+ entry = kgsl_sharedmem_find(private, param->gpuaddr);
+ spin_unlock(&private->mem_lock);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device,
+ "can't find gpuaddr %x\n",
+ param->gpuaddr);
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return _kgsl_gpumem_sync_cache(entry, param->op);
+}
+
+/* Legacy cache function, does a flush (clean + invalidate) */
+
static long
kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
- int result = 0;
- struct kgsl_mem_entry *entry;
struct kgsl_sharedmem_free *param = data;
struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry = NULL;
spin_lock(&private->mem_lock);
entry = kgsl_sharedmem_find(private, param->gpuaddr);
- if (!entry) {
- KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
- result = -EINVAL;
- goto done;
- }
- if (!entry->memdesc.hostptr) {
- KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n",
- param->gpuaddr);
- goto done;
+ spin_unlock(&private->mem_lock);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device,
+ "can't find gpuaddr %x\n",
+ param->gpuaddr);
+ return -EINVAL;
}
- kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN);
-done:
- spin_unlock(&private->mem_lock);
+ return _kgsl_gpumem_sync_cache(entry, KGSL_GPUMEM_CACHE_FLUSH);
+}
+
+/*
+ * The common parts of kgsl_ioctl_gpumem_alloc and kgsl_ioctl_gpumem_alloc_id.
+ */
+int
+_gpumem_alloc(struct kgsl_device_private *dev_priv,
+ struct kgsl_mem_entry **ret_entry,
+ unsigned int size, unsigned int flags)
+{
+ int result;
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry;
+
+ /*
+ * Mask off unknown flags from userspace. This way the caller can
+ * check if a flag is supported by looking at the returned flags.
+ */
+ flags &= KGSL_MEMFLAGS_GPUREADONLY
+ | KGSL_CACHEMODE_MASK
+ | KGSL_MEMTYPE_MASK
+ | KGSL_MEMALIGN_MASK
+ | KGSL_MEMFLAGS_USE_CPU_MAP;
+
+ entry = kgsl_mem_entry_create();
+ if (entry == NULL)
+ return -ENOMEM;
+
+ result = kgsl_allocate_user(&entry->memdesc, private->pagetable, size,
+ flags);
+ if (result != 0)
+ goto err;
+
+ entry->memtype = KGSL_MEM_ENTRY_KERNEL;
+
+ kgsl_check_idle(dev_priv->device);
+ *ret_entry = entry;
+ return result;
+err:
+ kfree(entry);
+ *ret_entry = NULL;
return result;
}
@@ -1738,29 +2050,115 @@
{
struct kgsl_process_private *private = dev_priv->process_priv;
struct kgsl_gpumem_alloc *param = data;
- struct kgsl_mem_entry *entry;
+ struct kgsl_mem_entry *entry = NULL;
int result;
- entry = kgsl_mem_entry_create();
- if (entry == NULL)
- return -ENOMEM;
+ param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
+ result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
+ if (result)
+ return result;
- result = kgsl_allocate_user(&entry->memdesc, private->pagetable,
- param->size, param->flags);
+ result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+ if (result)
+ goto err;
- if (result == 0) {
- entry->memtype = KGSL_MEM_ENTRY_KERNEL;
- kgsl_mem_entry_attach_process(entry, private);
- param->gpuaddr = entry->memdesc.gpuaddr;
+ result = kgsl_mem_entry_attach_process(entry, private);
+ if (result != 0)
+ goto err;
- kgsl_process_add_stats(private, entry->memtype, param->size);
- trace_kgsl_mem_alloc(entry);
- } else
- kfree(entry);
+ kgsl_process_add_stats(private, entry->memtype, param->size);
+ trace_kgsl_mem_alloc(entry);
- kgsl_check_idle(dev_priv->device);
+ param->gpuaddr = entry->memdesc.gpuaddr;
+ param->size = entry->memdesc.size;
+ param->flags = entry->memdesc.flags;
+ return result;
+err:
+ kgsl_sharedmem_free(&entry->memdesc);
+ kfree(entry);
return result;
}
+
+static long
+kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
+ unsigned int cmd, void *data)
+{
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_gpumem_alloc_id *param = data;
+ struct kgsl_mem_entry *entry = NULL;
+ int result;
+
+ if (!kgsl_mmu_use_cpu_map(private->pagetable->mmu))
+ param->flags &= ~KGSL_MEMFLAGS_USE_CPU_MAP;
+
+ result = _gpumem_alloc(dev_priv, &entry, param->size, param->flags);
+ if (result != 0)
+ goto err;
+
+ if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+ result = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+ if (result)
+ goto err;
+ }
+
+ result = kgsl_mem_entry_attach_process(entry, private);
+ if (result != 0)
+ goto err;
+
+ kgsl_process_add_stats(private, entry->memtype, param->size);
+ trace_kgsl_mem_alloc(entry);
+
+ param->id = entry->id;
+ param->flags = entry->memdesc.flags;
+ param->size = entry->memdesc.size;
+ param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
+ param->gpuaddr = entry->memdesc.gpuaddr;
+ return result;
+err:
+ if (entry)
+ kgsl_sharedmem_free(&entry->memdesc);
+ kfree(entry);
+ return result;
+}
+
+static long
+kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
+ unsigned int cmd, void *data)
+{
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_gpumem_get_info *param = data;
+ struct kgsl_mem_entry *entry = NULL;
+ int result = 0;
+
+ if (param->id != 0) {
+ entry = kgsl_sharedmem_find_id(private, param->id);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
+ param->id);
+ return -EINVAL;
+ }
+ } else if (param->gpuaddr != 0) {
+ spin_lock(&private->mem_lock);
+ entry = kgsl_sharedmem_find(private, param->gpuaddr);
+ spin_unlock(&private->mem_lock);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device,
+ "can't find gpuaddr %lx\n",
+ param->gpuaddr);
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+ param->gpuaddr = entry->memdesc.gpuaddr;
+ param->id = entry->id;
+ param->flags = entry->memdesc.flags;
+ param->size = entry->memdesc.size;
+ param->mmapsize = kgsl_memdesc_mmapsize(&entry->memdesc);
+ param->useraddr = entry->memdesc.useraddr;
+ return result;
+}
+
static long kgsl_ioctl_cff_syncmem(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
@@ -1924,7 +2322,7 @@
static const struct {
unsigned int cmd;
kgsl_ioctl_func_t func;
- int flags;
+ unsigned int flags;
} kgsl_ioctl_funcs[] = {
KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
kgsl_ioctl_device_getproperty,
@@ -1975,7 +2373,15 @@
KGSL_IOCTL_LOCK),
KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
kgsl_ioctl_device_setproperty,
- KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE)
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+ KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID,
+ kgsl_ioctl_gpumem_alloc_id, 0),
+ KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
+ kgsl_ioctl_gpumem_free_id, 0),
+ KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO,
+ kgsl_ioctl_gpumem_get_info, 0),
+ KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE,
+ kgsl_ioctl_gpumem_sync_cache, 0),
};
static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -2067,7 +2473,11 @@
mutex_unlock(&dev_priv->device->mutex);
}
- if (ret == 0 && (cmd & IOC_OUT)) {
+ /*
+ * Still copy back on failure, but assume function took
+ * all necessary precautions sanitizing the return values.
+ */
+ if (cmd & IOC_OUT) {
if (copy_to_user((void __user *) arg, uptr, _IOC_SIZE(cmd)))
ret = -EFAULT;
}
@@ -2135,6 +2545,8 @@
kgsl_gpumem_vm_close(struct vm_area_struct *vma)
{
struct kgsl_mem_entry *entry = vma->vm_private_data;
+
+ entry->memdesc.useraddr = 0;
kgsl_mem_entry_put(entry);
}
@@ -2144,8 +2556,145 @@
.close = kgsl_gpumem_vm_close,
};
+static int
+get_mmap_entry(struct kgsl_process_private *private,
+ struct kgsl_mem_entry **out_entry, unsigned long pgoff,
+ unsigned long len)
+{
+ int ret = -EINVAL;
+ struct kgsl_mem_entry *entry;
+
+ entry = kgsl_sharedmem_find_id(private, pgoff);
+ if (entry == NULL) {
+ spin_lock(&private->mem_lock);
+ entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT);
+ spin_unlock(&private->mem_lock);
+ }
+
+ if (!entry)
+ return -EINVAL;
+
+ kgsl_mem_entry_get(entry);
+
+ if (!entry->memdesc.ops ||
+ !entry->memdesc.ops->vmflags ||
+ !entry->memdesc.ops->vmfault) {
+ ret = -EINVAL;
+ goto err_put;
+ }
+
+ if (entry->memdesc.useraddr != 0) {
+ ret = -EBUSY;
+ goto err_put;
+ }
+
+ if (len != kgsl_memdesc_mmapsize(&entry->memdesc)) {
+ ret = -ERANGE;
+ goto err_put;
+ }
+
+ *out_entry = entry;
+ return 0;
+err_put:
+ kgsl_mem_entry_put(entry);
+ return ret;
+}
+
+static unsigned long
+kgsl_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+ unsigned long ret = 0;
+ unsigned long vma_offset = pgoff << PAGE_SHIFT;
+ struct kgsl_device_private *dev_priv = file->private_data;
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_device *device = dev_priv->device;
+ struct kgsl_mem_entry *entry = NULL;
+ unsigned int align;
+ unsigned int retry = 0;
+
+ if (vma_offset == device->memstore.gpuaddr)
+ return get_unmapped_area(NULL, addr, len, pgoff, flags);
+
+ ret = get_mmap_entry(private, &entry, pgoff, len);
+ if (ret)
+ return ret;
+
+ if (!kgsl_memdesc_use_cpu_map(&entry->memdesc) || (flags & MAP_FIXED)) {
+ /*
+ * If we're not going to use the same mapping on the gpu,
+ * any address is fine.
+ * For MAP_FIXED, hopefully the caller knows what they're doing,
+ * but we may fail in mmap() if there is already something
+ * at the virtual address chosen.
+ */
+ ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
+ goto put;
+ }
+ if (entry->memdesc.gpuaddr != 0) {
+ KGSL_MEM_INFO(device,
+ "pgoff %lx already mapped to gpuaddr %x\n",
+ pgoff, entry->memdesc.gpuaddr);
+ ret = -EBUSY;
+ goto put;
+ }
+
+ align = kgsl_memdesc_get_align(&entry->memdesc);
+ if (align >= ilog2(SZ_1M))
+ align = ilog2(SZ_1M);
+ else if (align >= ilog2(SZ_64K))
+ align = ilog2(SZ_64K);
+ else if (align <= PAGE_SHIFT)
+ align = 0;
+
+ if (align)
+ len += 1 << align;
+ do {
+ ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
+ if (IS_ERR_VALUE(ret))
+ break;
+ if (align)
+ ret = ALIGN(ret, (1 << align));
+
+ /*make sure there isn't a GPU only mapping at this address */
+ if (kgsl_sharedmem_region_empty(private, ret, len))
+ break;
+
+ trace_kgsl_mem_unmapped_area_collision(entry, addr, len, ret);
+
+ /*
+ * If we collided, bump the hint address so that
+ * get_umapped_area knows to look somewhere else.
+ */
+ addr = (addr == 0) ? ret + len : addr + len;
+
+ /*
+ * The addr hint can be set by userspace to be near
+ * the end of the address space. Make sure we search
+ * the whole address space at least once by wrapping
+ * back around once.
+ */
+ if (!retry && (addr + len >= TASK_SIZE)) {
+ addr = 0;
+ retry = 1;
+ } else {
+ ret = -EBUSY;
+ }
+ } while (addr + len < TASK_SIZE);
+
+ if (IS_ERR_VALUE(ret))
+ KGSL_MEM_INFO(device,
+ "pid %d pgoff %lx len %ld failed error %ld\n",
+ private->pid, pgoff, len, ret);
+put:
+ kgsl_mem_entry_put(entry);
+ return ret;
+}
+
static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
{
+ unsigned int ret, cache;
unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT;
struct kgsl_device_private *dev_priv = file->private_data;
struct kgsl_process_private *private = dev_priv->process_priv;
@@ -2157,31 +2706,76 @@
if (vma_offset == device->memstore.gpuaddr)
return kgsl_mmap_memstore(device, vma);
- /* Find a chunk of GPU memory */
+ ret = get_mmap_entry(private, &entry, vma->vm_pgoff,
+ vma->vm_end - vma->vm_start);
+ if (ret)
+ return ret;
- spin_lock(&private->mem_lock);
- entry = kgsl_sharedmem_find(private, vma_offset);
+ if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+ entry->memdesc.gpuaddr = vma->vm_start;
- if (entry)
- kgsl_mem_entry_get(entry);
-
- spin_unlock(&private->mem_lock);
-
- if (entry == NULL)
- return -EINVAL;
-
- if (!entry->memdesc.ops ||
- !entry->memdesc.ops->vmflags ||
- !entry->memdesc.ops->vmfault)
- return -EINVAL;
+ ret = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+ if (ret) {
+ kgsl_mem_entry_put(entry);
+ return ret;
+ }
+ kgsl_mem_entry_track_gpuaddr(private, entry);
+ }
vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc);
vma->vm_private_data = entry;
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ /* Determine user-side caching policy */
+
+ cache = kgsl_memdesc_get_cachemode(&entry->memdesc);
+
+ switch (cache) {
+ case KGSL_CACHEMODE_UNCACHED:
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITETHROUGH:
+ vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITEBACK:
+ vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITECOMBINE:
+ default:
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ break;
+ }
+
vma->vm_ops = &kgsl_gpumem_vm_ops;
+
+ if (cache == KGSL_CACHEMODE_WRITEBACK
+ || cache == KGSL_CACHEMODE_WRITETHROUGH) {
+ struct scatterlist *s;
+ int i;
+ int sglen = entry->memdesc.sglen;
+ unsigned long addr = vma->vm_start;
+
+ /* don't map in the guard page, it should always fault */
+ if (kgsl_memdesc_has_guard_page(&entry->memdesc))
+ sglen--;
+
+ for_each_sg(entry->memdesc.sg, s, sglen, i) {
+ int j;
+ for (j = 0; j < (sg_dma_len(s) >> PAGE_SHIFT); j++) {
+ struct page *page = sg_page(s);
+ page = nth_page(page, j);
+ vm_insert_page(vma, addr, page);
+ addr += PAGE_SIZE;
+ }
+ }
+ }
+
vma->vm_file = file;
+ entry->memdesc.useraddr = vma->vm_start;
+
+ trace_kgsl_mem_mmap(entry);
+
return 0;
}
@@ -2198,6 +2792,7 @@
.release = kgsl_release,
.open = kgsl_open,
.mmap = kgsl_mmap,
+ .get_unmapped_area = kgsl_get_unmapped_area,
.unlocked_ioctl = kgsl_ioctl,
};
@@ -2205,6 +2800,8 @@
.process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex),
.ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock),
.devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock),
+ .memfree_hist_mutex =
+ __MUTEX_INITIALIZER(kgsl_driver.memfree_hist_mutex),
};
EXPORT_SYMBOL(kgsl_driver);
@@ -2287,6 +2884,7 @@
kgsl_ion_client = msm_ion_client_create(UINT_MAX, KGSL_NAME);
+ /* Get starting physical address of device registers */
res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
device->iomemname);
if (res == NULL) {
@@ -2304,6 +2902,33 @@
device->reg_phys = res->start;
device->reg_len = resource_size(res);
+ /*
+ * Check if a shadermemname is defined, and then get shader memory
+ * details including shader memory starting physical address
+ * and shader memory length
+ */
+ if (device->shadermemname != NULL) {
+ res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+ device->shadermemname);
+
+ if (res == NULL) {
+ KGSL_DRV_ERR(device,
+ "Shader memory: platform_get_resource_byname failed\n");
+ }
+
+ else {
+ device->shader_mem_phys = res->start;
+ device->shader_mem_len = resource_size(res);
+ }
+
+ if (!devm_request_mem_region(device->dev,
+ device->shader_mem_phys,
+ device->shader_mem_len,
+ device->name)) {
+ KGSL_DRV_ERR(device, "request_mem_region_failed\n");
+ }
+ }
+
if (!devm_request_mem_region(device->dev, device->reg_phys,
device->reg_len, device->name)) {
KGSL_DRV_ERR(device, "request_mem_region failed\n");
@@ -2349,7 +2974,6 @@
if (result)
goto error_pwrctrl_close;
- kgsl_cffdump_open(device->id);
setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
status = kgsl_create_device_workqueue(device);
@@ -2371,7 +2995,8 @@
goto error_close_mmu;
}
- pm_qos_add_request(&device->pm_qos_req_dma, PM_QOS_CPU_DMA_LATENCY,
+ pm_qos_add_request(&device->pwrctrl.pm_qos_req_dma,
+ PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
/* Initalize the snapshot engine */
@@ -2421,11 +3046,15 @@
if (device->pm_dump_enable) {
KGSL_LOG_DUMP(device,
- "POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
- pwr->power_flags, pwr->active_pwrlevel);
+ "POWER: NAP ALLOWED = %d | START_STOP_SLEEP_WAKE = %d\n"
+ , pwr->nap_allowed, pwr->strtstp_sleepwake);
+
+ KGSL_LOG_DUMP(device,
+ "POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
+ pwr->power_flags, pwr->active_pwrlevel);
KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
- pwr->interval_timeout);
+ pwr->interval_timeout);
}
@@ -2473,10 +3102,9 @@
{
kgsl_device_snapshot_close(device);
- kgsl_cffdump_close(device->id);
kgsl_pwrctrl_uninit_sysfs(device);
- pm_qos_remove_request(&device->pm_qos_req_dma);
+ pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
idr_destroy(&device->context_idr);
@@ -2529,6 +3157,7 @@
kgsl_driver.class = NULL;
}
+ kgsl_memfree_hist_exit();
unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX);
}
@@ -2600,6 +3229,9 @@
goto err;
}
+ if (kgsl_memfree_hist_init())
+ KGSL_CORE_ERR("failed to init memfree_hist");
+
return 0;
err:
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 3935164..c568db5 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -71,6 +71,23 @@
#define KGSL_STATS_ADD(_size, _stat, _max) \
do { _stat += (_size); if (_stat > _max) _max = _stat; } while (0)
+
+#define KGSL_MEMFREE_HIST_SIZE ((int)(PAGE_SIZE * 2))
+
+struct kgsl_memfree_hist_elem {
+ unsigned int pid;
+ unsigned int gpuaddr;
+ unsigned int size;
+ unsigned int flags;
+};
+
+struct kgsl_memfree_hist {
+ void *base_hist_rb;
+ unsigned int size;
+ struct kgsl_memfree_hist_elem *wptr;
+};
+
+
struct kgsl_device;
struct kgsl_context;
@@ -99,6 +116,9 @@
void *ptpool;
+ struct mutex memfree_hist_mutex;
+ struct kgsl_memfree_hist memfree_hist;
+
struct {
unsigned int vmalloc;
unsigned int vmalloc_max;
@@ -129,13 +149,16 @@
#define KGSL_MEMDESC_GUARD_PAGE BIT(0)
/* Set if the memdesc is mapped into all pagetables */
#define KGSL_MEMDESC_GLOBAL BIT(1)
+/* The memdesc is frozen during a snapshot */
+#define KGSL_MEMDESC_FROZEN BIT(2)
/* shared memory allocation */
struct kgsl_memdesc {
struct kgsl_pagetable *pagetable;
- void *hostptr;
+ void *hostptr; /* kernel virtual address */
+ unsigned long useraddr; /* userspace address */
unsigned int gpuaddr;
- unsigned int physaddr;
+ phys_addr_t physaddr;
unsigned int size;
unsigned int priv; /* Internal flags and settings */
struct scatterlist *sg;
@@ -154,17 +177,13 @@
#define KGSL_MEM_ENTRY_ION 4
#define KGSL_MEM_ENTRY_MAX 5
-/* List of flags */
-
-#define KGSL_MEM_ENTRY_FROZEN (1 << 0)
-
struct kgsl_mem_entry {
struct kref refcount;
struct kgsl_memdesc memdesc;
int memtype;
- int flags;
void *priv_data;
struct rb_node node;
+ unsigned int id;
unsigned int context_id;
/* back pointer to private structure under whose context this
* allocation is made */
@@ -224,6 +243,10 @@
static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
unsigned int gpuaddr, unsigned int size)
{
+ /* don't overflow */
+ if ((gpuaddr + size) < gpuaddr)
+ return 0;
+
if (gpuaddr >= memdesc->gpuaddr &&
((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
return 1;
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
index e06c94d..6dc2ccc 100644
--- a/drivers/gpu/msm/kgsl_cffdump.c
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -28,6 +28,7 @@
#include "kgsl_log.h"
#include "kgsl_sharedmem.h"
#include "adreno_pm4types.h"
+#include "adreno.h"
static struct rchan *chan;
static struct dentry *dir;
@@ -334,7 +335,7 @@
return;
}
- kgsl_cff_dump_enable = 1;
+ kgsl_cff_dump_enable = 0;
spin_lock_init(&cffdump_lock);
@@ -356,10 +357,21 @@
debugfs_remove(dir);
}
-void kgsl_cffdump_open(enum kgsl_deviceid device_id)
+void kgsl_cffdump_open(struct kgsl_device *device)
{
- kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE,
- kgsl_mmu_get_ptsize(), SZ_256K);
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+ if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+ kgsl_cffdump_memory_base(device->id,
+ kgsl_mmu_get_base_addr(&device->mmu),
+ kgsl_mmu_get_ptsize(&device->mmu) +
+ KGSL_IOMMU_GLOBAL_MEM_SIZE, adreno_dev->gmem_size);
+ } else {
+ kgsl_cffdump_memory_base(device->id,
+ kgsl_mmu_get_base_addr(&device->mmu),
+ kgsl_mmu_get_ptsize(&device->mmu),
+ adreno_dev->gmem_size);
+ }
}
void kgsl_cffdump_memory_base(enum kgsl_deviceid device_id, unsigned int base,
@@ -387,7 +399,7 @@
}
void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
- const struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes,
+ struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes,
bool clean_cache)
{
const void *src;
@@ -522,7 +534,7 @@
}
static struct dentry *create_buf_file_handler(const char *filename,
- struct dentry *parent, int mode, struct rchan_buf *buf,
+ struct dentry *parent, unsigned short mode, struct rchan_buf *buf,
int *is_global)
{
return debugfs_create_file(filename, mode, parent, buf,
diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h
index 2733cc3..d5656f8 100644
--- a/drivers/gpu/msm/kgsl_cffdump.h
+++ b/drivers/gpu/msm/kgsl_cffdump.h
@@ -22,10 +22,10 @@
void kgsl_cffdump_init(void);
void kgsl_cffdump_destroy(void);
-void kgsl_cffdump_open(enum kgsl_deviceid device_id);
+void kgsl_cffdump_open(struct kgsl_device *device);
void kgsl_cffdump_close(enum kgsl_deviceid device_id);
void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
- const struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes,
+ struct kgsl_memdesc *memdesc, uint physaddr, uint sizebytes,
bool clean_cache);
void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes);
void kgsl_cffdump_regwrite(enum kgsl_deviceid device_id, uint addr,
@@ -49,7 +49,7 @@
#define kgsl_cffdump_init() (void)0
#define kgsl_cffdump_destroy() (void)0
-#define kgsl_cffdump_open(device_id) (void)0
+#define kgsl_cffdump_open(device) (void)0
#define kgsl_cffdump_close(device_id) (void)0
#define kgsl_cffdump_syncmem(dev_priv, memdesc, addr, sizebytes, clean_cache) \
(void) 0
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index b41bd6b..9dfda32 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -125,6 +125,52 @@
KGSL_DEBUGFS_LOG(pwr_log);
KGSL_DEBUGFS_LOG(ft_log);
+static int memfree_hist_print(struct seq_file *s, void *unused)
+{
+ void *base = kgsl_driver.memfree_hist.base_hist_rb;
+
+ struct kgsl_memfree_hist_elem *wptr = kgsl_driver.memfree_hist.wptr;
+ struct kgsl_memfree_hist_elem *p;
+ char str[16];
+
+ seq_printf(s, "%8s %8s %8s %11s\n",
+ "pid", "gpuaddr", "size", "flags");
+
+ mutex_lock(&kgsl_driver.memfree_hist_mutex);
+ p = wptr;
+ for (;;) {
+ kgsl_get_memory_usage(str, sizeof(str), p->flags);
+ /*
+ * if the ring buffer is not filled up yet
+ * all its empty elems have size==0
+ * just skip them ...
+ */
+ if (p->size)
+ seq_printf(s, "%8d %08x %8d %11s\n",
+ p->pid, p->gpuaddr, p->size, str);
+ p++;
+ if ((void *)p >= base + kgsl_driver.memfree_hist.size)
+ p = (struct kgsl_memfree_hist_elem *) base;
+
+ if (p == kgsl_driver.memfree_hist.wptr)
+ break;
+ }
+ mutex_unlock(&kgsl_driver.memfree_hist_mutex);
+ return 0;
+}
+
+static int memfree_hist_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, memfree_hist_print, inode->i_private);
+}
+
+static const struct file_operations memfree_hist_fops = {
+ .open = memfree_hist_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
void kgsl_device_debugfs_init(struct kgsl_device *device)
{
if (kgsl_debugfs_dir && !IS_ERR(kgsl_debugfs_dir))
@@ -151,6 +197,8 @@
&mem_log_fops);
debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
&pwr_log_fops);
+ debugfs_create_file("memfree_history", 0444, device->d_debugfs, device,
+ &memfree_hist_fops);
debugfs_create_file("log_level_ft", 0644, device->d_debugfs, device,
&ft_log_fops);
@@ -198,35 +246,71 @@
return '-';
}
+static char get_cacheflag(const struct kgsl_memdesc *m)
+{
+ static const char table[] = {
+ [KGSL_CACHEMODE_WRITECOMBINE] = '-',
+ [KGSL_CACHEMODE_UNCACHED] = 'u',
+ [KGSL_CACHEMODE_WRITEBACK] = 'b',
+ [KGSL_CACHEMODE_WRITETHROUGH] = 't',
+ };
+ return table[kgsl_memdesc_get_cachemode(m)];
+}
+
+static void print_mem_entry(struct seq_file *s, struct kgsl_mem_entry *entry)
+{
+ char flags[6];
+ char usage[16];
+ struct kgsl_memdesc *m = &entry->memdesc;
+
+ flags[0] = kgsl_memdesc_is_global(m) ? 'g' : '-';
+ flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
+ flags[2] = get_alignflag(m);
+ flags[3] = get_cacheflag(m);
+ flags[4] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
+ flags[5] = '\0';
+
+ kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+ seq_printf(s, "%08x %08lx %8d %5d %5s %10s %16s %5d\n",
+ m->gpuaddr, m->useraddr, m->size, entry->id, flags,
+ memtype_str(entry->memtype), usage, m->sglen);
+}
+
static int process_mem_print(struct seq_file *s, void *unused)
{
struct kgsl_mem_entry *entry;
struct rb_node *node;
struct kgsl_process_private *private = s->private;
- char flags[4];
- char usage[16];
+ int next = 0;
+ seq_printf(s, "%8s %8s %8s %5s %5s %10s %16s %5s\n",
+ "gpuaddr", "useraddr", "size", "id", "flags", "type",
+ "usage", "sglen");
+
+ /* print all entries with a GPU address */
spin_lock(&private->mem_lock);
- seq_printf(s, "%8s %8s %5s %10s %16s %5s\n",
- "gpuaddr", "size", "flags", "type", "usage", "sglen");
+
for (node = rb_first(&private->mem_rb); node; node = rb_next(node)) {
- struct kgsl_memdesc *m;
-
entry = rb_entry(node, struct kgsl_mem_entry, node);
- m = &entry->memdesc;
-
- flags[0] = m->priv & KGSL_MEMDESC_GLOBAL ? 'g' : '-';
- flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
- flags[2] = get_alignflag(m);
- flags[3] = '\0';
-
- kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
-
- seq_printf(s, "%08x %8d %5s %10s %16s %5d\n",
- m->gpuaddr, m->size, flags,
- memtype_str(entry->memtype), usage, m->sglen);
+ print_mem_entry(s, entry);
}
+
spin_unlock(&private->mem_lock);
+
+ /* now print all the unbound entries */
+ while (1) {
+ rcu_read_lock();
+ entry = idr_get_next(&private->mem_idr, &next);
+ rcu_read_unlock();
+
+ if (entry == NULL)
+ break;
+ if (entry->memdesc.gpuaddr == 0)
+ print_mem_entry(s, entry);
+ next++;
+ }
+
return 0;
}
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index b215d8c..0d11660 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -73,7 +73,8 @@
int (*idle) (struct kgsl_device *device);
unsigned int (*isidle) (struct kgsl_device *device);
int (*suspend_context) (struct kgsl_device *device);
- int (*start) (struct kgsl_device *device, unsigned int init_ram);
+ int (*init) (struct kgsl_device *device);
+ int (*start) (struct kgsl_device *device);
int (*stop) (struct kgsl_device *device);
int (*getproperty) (struct kgsl_device *device,
enum kgsl_property_type type, void *value,
@@ -105,7 +106,7 @@
uint32_t flags);
int (*drawctxt_create) (struct kgsl_device *device,
struct kgsl_pagetable *pagetable, struct kgsl_context *context,
- uint32_t flags);
+ uint32_t *flags);
void (*drawctxt_destroy) (struct kgsl_device *device,
struct kgsl_context *context);
long (*ioctl) (struct kgsl_device_private *dev_priv,
@@ -145,11 +146,27 @@
unsigned int ver_minor;
uint32_t flags;
enum kgsl_deviceid id;
+
+ /* Starting physical address for GPU registers */
unsigned long reg_phys;
+
+ /* Starting Kernel virtual address for GPU registers */
void *reg_virt;
+
+ /* Total memory size for all GPU registers */
unsigned int reg_len;
+
+ /* Kernel virtual address for GPU shader memory */
+ void *shader_mem_virt;
+
+ /* Starting physical address for GPU shader memory */
+ unsigned long shader_mem_phys;
+
+ /* GPU shader memory size */
+ unsigned int shader_mem_len;
struct kgsl_memdesc memstore;
const char *iomemname;
+ const char *shadermemname;
struct kgsl_mh mh;
struct kgsl_mmu mmu;
@@ -160,7 +177,6 @@
struct kgsl_pwrctrl pwrctrl;
int open_count;
- struct atomic_notifier_head ts_notifier_list;
struct mutex mutex;
uint32_t state;
uint32_t requested_state;
@@ -201,7 +217,6 @@
int pm_dump_enable;
struct kgsl_pwrscale pwrscale;
struct kobject pwrscale_kobj;
- struct pm_qos_request pm_qos_req_dma;
struct work_struct ts_expired_ws;
struct list_head events;
struct list_head events_pending_list;
@@ -210,16 +225,16 @@
/* Postmortem Control switches */
int pm_regs_enabled;
int pm_ib_enabled;
+
+ int reset_counter; /* Track how many GPU core resets have occured */
};
void kgsl_process_events(struct work_struct *work);
-void kgsl_check_fences(struct work_struct *work);
#define KGSL_DEVICE_COMMON_INIT(_dev) \
.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
.suspend_gate = COMPLETION_INITIALIZER((_dev).suspend_gate),\
.ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\
- .ts_notifier_list = ATOMIC_NOTIFIER_INIT((_dev).ts_notifier_list),\
.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
kgsl_idle_check),\
.ts_expired_ws = __WORK_INITIALIZER((_dev).ts_expired_ws,\
@@ -266,6 +281,7 @@
pid_t pid;
spinlock_t mem_lock;
struct rb_root mem_rb;
+ struct idr mem_idr;
struct kgsl_pagetable *pagetable;
struct list_head list;
struct kobject kobj;
@@ -391,12 +407,6 @@
int kgsl_check_timestamp(struct kgsl_device *device,
struct kgsl_context *context, unsigned int timestamp);
-int kgsl_register_ts_notifier(struct kgsl_device *device,
- struct notifier_block *nb);
-
-int kgsl_unregister_ts_notifier(struct kgsl_device *device,
- struct notifier_block *nb);
-
int kgsl_device_platform_probe(struct kgsl_device *device);
void kgsl_device_platform_remove(struct kgsl_device *device);
diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c
index 2a5a5fa..11d6ffa 100644
--- a/drivers/gpu/msm/kgsl_drm.c
+++ b/drivers/gpu/msm/kgsl_drm.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2009-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2009-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -16,7 +16,8 @@
*/
#include "drmP.h"
#include "drm.h"
-#include <linux/android_pmem.h>
+
+#include <linux/msm_ion.h>
#include "kgsl.h"
#include "kgsl_device.h"
@@ -27,7 +28,7 @@
#define DRIVER_AUTHOR "Qualcomm"
#define DRIVER_NAME "kgsl"
#define DRIVER_DESC "KGSL DRM"
-#define DRIVER_DATE "20100127"
+#define DRIVER_DATE "20121107"
#define DRIVER_MAJOR 2
#define DRIVER_MINOR 1
@@ -106,6 +107,7 @@
uint32_t type;
struct kgsl_memdesc memdesc;
struct kgsl_pagetable *pagetable;
+ struct ion_handle *ion_handle;
uint64_t mmap_offset;
int bufcount;
int flags;
@@ -129,86 +131,18 @@
struct list_head wait_list;
};
+static struct ion_client *kgsl_drm_ion_client;
+
static int kgsl_drm_inited = DRM_KGSL_NOT_INITED;
/* This is a global list of all the memory currently mapped in the MMU */
static struct list_head kgsl_mem_list;
-static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op)
-{
- int cacheop = 0;
-
- switch (op) {
- case DRM_KGSL_GEM_CACHE_OP_TO_DEV:
- if (type & (DRM_KGSL_GEM_CACHE_WBACK |
- DRM_KGSL_GEM_CACHE_WBACKWA))
- cacheop = KGSL_CACHE_OP_CLEAN;
-
- break;
-
- case DRM_KGSL_GEM_CACHE_OP_FROM_DEV:
- if (type & (DRM_KGSL_GEM_CACHE_WBACK |
- DRM_KGSL_GEM_CACHE_WBACKWA |
- DRM_KGSL_GEM_CACHE_WTHROUGH))
- cacheop = KGSL_CACHE_OP_INV;
- }
-
- kgsl_cache_range_op(memdesc, cacheop);
-}
-
-/* TODO:
- * Add vsync wait */
-
-static int kgsl_drm_load(struct drm_device *dev, unsigned long flags)
-{
- return 0;
-}
-
-static int kgsl_drm_unload(struct drm_device *dev)
-{
- return 0;
-}
-
struct kgsl_drm_device_priv {
struct kgsl_device *device[KGSL_DEVICE_MAX];
struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX];
};
-void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
-{
-}
-
-static int kgsl_drm_suspend(struct drm_device *dev, pm_message_t state)
-{
- return 0;
-}
-
-static int kgsl_drm_resume(struct drm_device *dev)
-{
- return 0;
-}
-
-static void
-kgsl_gem_free_mmap_offset(struct drm_gem_object *obj)
-{
- struct drm_device *dev = obj->dev;
- struct drm_gem_mm *mm = dev->mm_private;
- struct drm_kgsl_gem_object *priv = obj->driver_private;
- struct drm_map_list *list;
-
- list = &obj->map_list;
- drm_ht_remove_item(&mm->offset_hash, &list->hash);
- if (list->file_offset_node) {
- drm_mm_put_block(list->file_offset_node);
- list->file_offset_node = NULL;
- }
-
- kfree(list->map);
- list->map = NULL;
-
- priv->mmap_offset = 0;
-}
-
static int
kgsl_gem_memory_allocated(struct drm_gem_object *obj)
{
@@ -220,6 +154,8 @@
kgsl_gem_alloc_memory(struct drm_gem_object *obj)
{
struct drm_kgsl_gem_object *priv = obj->driver_private;
+ struct sg_table *sg_table;
+ struct scatterlist *s;
int index;
int result = 0;
@@ -237,21 +173,52 @@
}
}
- /* Set the flags for the memdesc (probably 0, unless it is cached) */
- priv->memdesc.priv = 0;
-
if (TYPE_IS_PMEM(priv->type)) {
if (priv->type == DRM_KGSL_GEM_TYPE_EBI ||
priv->type & DRM_KGSL_GEM_PMEM_EBI) {
- result = kgsl_sharedmem_ebimem_user(
- &priv->memdesc,
- priv->pagetable,
- obj->size * priv->bufcount);
- if (result) {
- DRM_ERROR(
- "Unable to allocate PMEM memory\n");
- return result;
- }
+ priv->ion_handle = ion_alloc(kgsl_drm_ion_client,
+ obj->size * priv->bufcount, PAGE_SIZE,
+ ION_HEAP(ION_SF_HEAP_ID), 0);
+ if (IS_ERR_OR_NULL(priv->ion_handle)) {
+ DRM_ERROR(
+ "Unable to allocate ION Phys memory handle\n");
+ return -ENOMEM;
+ }
+
+ priv->memdesc.pagetable = priv->pagetable;
+
+ result = ion_phys(kgsl_drm_ion_client,
+ priv->ion_handle, (ion_phys_addr_t *)
+ &priv->memdesc.physaddr, &priv->memdesc.size);
+ if (result) {
+ DRM_ERROR(
+ "Unable to get ION Physical memory address\n");
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ return result;
+ }
+
+ result = memdesc_sg_phys(&priv->memdesc,
+ priv->memdesc.physaddr, priv->memdesc.size);
+ if (result) {
+ DRM_ERROR(
+ "Unable to get sg list\n");
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ return result;
+ }
+
+ result = kgsl_mmu_map(priv->pagetable, &priv->memdesc);
+ if (result) {
+ DRM_ERROR(
+ "kgsl_mmu_map failed. result = %d\n", result);
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ return result;
+ }
}
else
return -EINVAL;
@@ -262,15 +229,44 @@
priv->type & DRM_KGSL_GEM_CACHE_MASK)
list_add(&priv->list, &kgsl_mem_list);
- result = kgsl_sharedmem_page_alloc_user(&priv->memdesc,
- priv->pagetable,
- obj->size * priv->bufcount);
+ priv->memdesc.pagetable = priv->pagetable;
- if (result != 0) {
- DRM_ERROR(
- "Unable to allocate Vmalloc user memory\n");
- return result;
+ priv->ion_handle = ion_alloc(kgsl_drm_ion_client,
+ obj->size * priv->bufcount, PAGE_SIZE,
+ ION_HEAP(ION_IOMMU_HEAP_ID), 0);
+ if (IS_ERR_OR_NULL(priv->ion_handle)) {
+ DRM_ERROR(
+ "Unable to allocate ION IOMMU memory handle\n");
+ return -ENOMEM;
}
+
+ sg_table = ion_sg_table(kgsl_drm_ion_client,
+ priv->ion_handle);
+ if (IS_ERR_OR_NULL(priv->ion_handle)) {
+ DRM_ERROR(
+ "Unable to get ION sg table\n");
+ goto memerr;
+ }
+
+ priv->memdesc.sg = sg_table->sgl;
+
+ /* Calculate the size of the memdesc from the sglist */
+
+ priv->memdesc.sglen = 0;
+
+ for (s = priv->memdesc.sg; s != NULL; s = sg_next(s)) {
+ priv->memdesc.size += s->length;
+ priv->memdesc.sglen++;
+ }
+
+ result = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
+ GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ if (result) {
+ DRM_ERROR(
+ "kgsl_mmu_map failed. result = %d\n", result);
+ goto memerr;
+ }
+
} else
return -EINVAL;
@@ -282,7 +278,15 @@
}
priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
+
return 0;
+
+memerr:
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ return -ENOMEM;
+
}
static void
@@ -293,10 +297,19 @@
if (!kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
return;
- kgsl_gem_mem_flush(&priv->memdesc, priv->type,
- DRM_KGSL_GEM_CACHE_OP_FROM_DEV);
+ if (priv->memdesc.gpuaddr)
+ kgsl_mmu_unmap(priv->memdesc.pagetable, &priv->memdesc);
- kgsl_sharedmem_free(&priv->memdesc);
+ /* ION will take care of freeing the sg table. */
+ priv->memdesc.sg = NULL;
+ priv->memdesc.sglen = 0;
+
+ if (priv->ion_handle)
+ ion_free(kgsl_drm_ion_client, priv->ion_handle);
+
+ priv->ion_handle = NULL;
+
+ memset(&priv->memdesc, 0, sizeof(priv->memdesc));
kgsl_mmu_putpagetable(priv->pagetable);
priv->pagetable = NULL;
@@ -329,66 +342,10 @@
kgsl_gem_free_object(struct drm_gem_object *obj)
{
kgsl_gem_free_memory(obj);
- kgsl_gem_free_mmap_offset(obj);
drm_gem_object_release(obj);
kfree(obj->driver_private);
}
-static int
-kgsl_gem_create_mmap_offset(struct drm_gem_object *obj)
-{
- struct drm_device *dev = obj->dev;
- struct drm_gem_mm *mm = dev->mm_private;
- struct drm_kgsl_gem_object *priv = obj->driver_private;
- struct drm_map_list *list;
- int msize;
-
- list = &obj->map_list;
- list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
- if (list->map == NULL) {
- DRM_ERROR("Unable to allocate drm_map_list\n");
- return -ENOMEM;
- }
-
- msize = obj->size * priv->bufcount;
-
- list->map->type = _DRM_GEM;
- list->map->size = msize;
- list->map->handle = obj;
-
- /* Allocate a mmap offset */
- list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
- msize / PAGE_SIZE,
- 0, 0);
-
- if (!list->file_offset_node) {
- DRM_ERROR("Failed to allocate offset for %d\n", obj->name);
- kfree(list->map);
- return -ENOMEM;
- }
-
- list->file_offset_node = drm_mm_get_block(list->file_offset_node,
- msize / PAGE_SIZE, 0);
-
- if (!list->file_offset_node) {
- DRM_ERROR("Unable to create the file_offset_node\n");
- kfree(list->map);
- return -ENOMEM;
- }
-
- list->hash.key = list->file_offset_node->start;
- if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
- DRM_ERROR("Failed to add to map hash\n");
- drm_mm_put_block(list->file_offset_node);
- kfree(list->map);
- return -ENOMEM;
- }
-
- priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
-
- return 0;
-}
-
int
kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
unsigned long *len)
@@ -435,9 +392,6 @@
priv->bufs[priv->active].offset;
*len = priv->memdesc.size;
-
- kgsl_gem_mem_flush(&priv->memdesc,
- priv->type, DRM_KGSL_GEM_CACHE_OP_TO_DEV);
} else {
*start = 0;
*len = 0;
@@ -468,10 +422,7 @@
priv->active = 0;
priv->bound = 0;
- /* To preserve backwards compatability, the default memory source
- is EBI */
-
- priv->type = DRM_KGSL_GEM_TYPE_PMEM | DRM_KGSL_GEM_PMEM_EBI;
+ priv->type = DRM_KGSL_GEM_TYPE_KMEM;
ret = drm_gem_handle_create(file_priv, obj, handle);
@@ -513,8 +464,11 @@
}
ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
- if (ret)
+ if (ret) {
+ drm_gem_object_release(obj);
+ DRM_ERROR("Unable to initialize GEM object ret = %d\n", ret);
return ret;
+ }
create->handle = handle;
return 0;
@@ -587,6 +541,149 @@
}
int
+kgsl_gem_create_from_ion_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_create_from_ion *args = data;
+ struct drm_gem_object *obj;
+ struct ion_handle *ion_handle;
+ struct drm_kgsl_gem_object *priv;
+ struct sg_table *sg_table;
+ struct scatterlist *s;
+ int ret, handle;
+ unsigned long size;
+
+ ion_handle = ion_import_dma_buf(kgsl_drm_ion_client, args->ion_fd);
+ if (IS_ERR_OR_NULL(ion_handle)) {
+ DRM_ERROR("Unable to import dmabuf. Error number = %d\n",
+ (int)PTR_ERR(ion_handle));
+ return -EINVAL;
+ }
+
+ ion_handle_get_size(kgsl_drm_ion_client, ion_handle, &size);
+
+ if (size == 0) {
+ ion_free(kgsl_drm_ion_client, ion_handle);
+ DRM_ERROR(
+ "cannot create GEM object from zero size ION buffer\n");
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_alloc(dev, size);
+
+ if (obj == NULL) {
+ ion_free(kgsl_drm_ion_client, ion_handle);
+ DRM_ERROR("Unable to allocate the GEM object\n");
+ return -ENOMEM;
+ }
+
+ ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
+ if (ret) {
+ ion_free(kgsl_drm_ion_client, ion_handle);
+ drm_gem_object_release(obj);
+ DRM_ERROR("Unable to initialize GEM object ret = %d\n", ret);
+ return ret;
+ }
+
+ priv = obj->driver_private;
+ priv->ion_handle = ion_handle;
+
+ priv->type = DRM_KGSL_GEM_TYPE_KMEM;
+ list_add(&priv->list, &kgsl_mem_list);
+
+ priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+
+ priv->memdesc.pagetable = priv->pagetable;
+
+ sg_table = ion_sg_table(kgsl_drm_ion_client,
+ priv->ion_handle);
+ if (IS_ERR_OR_NULL(priv->ion_handle)) {
+ DRM_ERROR("Unable to get ION sg table\n");
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ kgsl_mmu_putpagetable(priv->pagetable);
+ drm_gem_object_release(obj);
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ priv->memdesc.sg = sg_table->sgl;
+
+ /* Calculate the size of the memdesc from the sglist */
+
+ priv->memdesc.sglen = 0;
+
+ for (s = priv->memdesc.sg; s != NULL; s = sg_next(s)) {
+ priv->memdesc.size += s->length;
+ priv->memdesc.sglen++;
+ }
+
+ ret = kgsl_mmu_map(priv->pagetable, &priv->memdesc,
+ GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ if (ret) {
+ DRM_ERROR("kgsl_mmu_map failed. ret = %d\n", ret);
+ ion_free(kgsl_drm_ion_client,
+ priv->ion_handle);
+ priv->ion_handle = NULL;
+ kgsl_mmu_putpagetable(priv->pagetable);
+ drm_gem_object_release(obj);
+ kfree(priv);
+ return -ENOMEM;
+ }
+
+ priv->bufs[0].offset = 0;
+ priv->bufs[0].gpuaddr = priv->memdesc.gpuaddr;
+ priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
+
+ args->handle = handle;
+ return 0;
+}
+
+int
+kgsl_gem_get_ion_fd_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_get_ion_fd *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = 0;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ if (TYPE_IS_FD(priv->type))
+ ret = -EINVAL;
+ else if (TYPE_IS_PMEM(priv->type) || TYPE_IS_MEM(priv->type)) {
+ if (priv->ion_handle) {
+ args->ion_fd = ion_share_dma_buf(
+ kgsl_drm_ion_client, priv->ion_handle);
+ if (args->ion_fd < 0) {
+ DRM_ERROR(
+ "Could not share ion buffer. Error = %d\n",
+ args->ion_fd);
+ ret = -EINVAL;
+ }
+ } else {
+ DRM_ERROR("GEM object has no ion memory allocated.\n");
+ ret = -EINVAL;
+ }
+ }
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int
kgsl_gem_setmemtype_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
@@ -685,13 +782,9 @@
if (ret) {
DRM_ERROR("Unable to allocate object memory\n");
- } else if (!priv->mmap_offset) {
- ret = kgsl_gem_create_mmap_offset(obj);
- if (ret)
- DRM_ERROR("Unable to create a mmap offset\n");
}
- args->offset = priv->mmap_offset;
+ args->offset = 0;
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
@@ -703,33 +796,7 @@
kgsl_gem_mmap_ioctl(struct drm_device *dev, void *data,
struct drm_file *file_priv)
{
- struct drm_kgsl_gem_mmap *args = data;
- struct drm_gem_object *obj;
- unsigned long addr;
-
- obj = drm_gem_object_lookup(dev, file_priv, args->handle);
-
- if (obj == NULL) {
- DRM_ERROR("Invalid GEM handle %x\n", args->handle);
- return -EBADF;
- }
-
- down_write(¤t->mm->mmap_sem);
-
- addr = do_mmap(obj->filp, 0, args->size,
- PROT_READ | PROT_WRITE, MAP_SHARED,
- args->offset);
-
- up_write(¤t->mm->mmap_sem);
-
- mutex_lock(&dev->struct_mutex);
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
-
- if (IS_ERR((void *) addr))
- return addr;
-
- args->hostptr = (uint32_t) addr;
+ /* Ion is used for mmap at this time */
return 0;
}
@@ -762,18 +829,6 @@
return ret;
}
- if (priv->mmap_offset == 0) {
- ret = kgsl_gem_create_mmap_offset(obj);
- if (ret) {
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- return ret;
- }
- }
-
- args->offset = priv->mmap_offset;
- args->phys = priv->memdesc.physaddr;
-
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
@@ -957,122 +1012,6 @@
}
}
-static struct vm_operations_struct kgsl_gem_kmem_vm_ops = {
- .fault = kgsl_gem_kmem_fault,
- .open = drm_gem_vm_open,
- .close = drm_gem_vm_close,
-};
-
-static struct vm_operations_struct kgsl_gem_phys_vm_ops = {
- .fault = kgsl_gem_phys_fault,
- .open = drm_gem_vm_open,
- .close = drm_gem_vm_close,
-};
-
-/* This is a clone of the standard drm_gem_mmap function modified to allow
- us to properly map KMEM regions as well as the PMEM regions */
-
-int msm_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
-{
- struct drm_file *priv = filp->private_data;
- struct drm_device *dev = priv->minor->dev;
- struct drm_gem_mm *mm = dev->mm_private;
- struct drm_local_map *map = NULL;
- struct drm_gem_object *obj;
- struct drm_hash_item *hash;
- struct drm_kgsl_gem_object *gpriv;
- int ret = 0;
-
- mutex_lock(&dev->struct_mutex);
-
- if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
- mutex_unlock(&dev->struct_mutex);
- return drm_mmap(filp, vma);
- }
-
- map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
- if (!map ||
- ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
- ret = -EPERM;
- goto out_unlock;
- }
-
- /* Check for valid size. */
- if (map->size < vma->vm_end - vma->vm_start) {
- ret = -EINVAL;
- goto out_unlock;
- }
-
- obj = map->handle;
-
- gpriv = obj->driver_private;
-
- /* VM_PFNMAP is only for memory that doesn't use struct page
- * in other words, not "normal" memory. If you try to use it
- * with "normal" memory then the mappings don't get flushed. */
-
- if (TYPE_IS_MEM(gpriv->type)) {
- vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
- vma->vm_ops = &kgsl_gem_kmem_vm_ops;
- } else {
- vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP |
- VM_DONTEXPAND;
- vma->vm_ops = &kgsl_gem_phys_vm_ops;
- }
-
- vma->vm_private_data = map->handle;
-
-
- /* Take care of requested caching policy */
- if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM ||
- gpriv->type & DRM_KGSL_GEM_CACHE_MASK) {
- if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACKWA)
- vma->vm_page_prot =
- pgprot_writebackwacache(vma->vm_page_prot);
- else if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACK)
- vma->vm_page_prot =
- pgprot_writebackcache(vma->vm_page_prot);
- else if (gpriv->type & DRM_KGSL_GEM_CACHE_WTHROUGH)
- vma->vm_page_prot =
- pgprot_writethroughcache(vma->vm_page_prot);
- else
- vma->vm_page_prot =
- pgprot_writecombine(vma->vm_page_prot);
- } else {
- if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM_NOCACHE)
- vma->vm_page_prot =
- pgprot_noncached(vma->vm_page_prot);
- else
- /* default pmem is WC */
- vma->vm_page_prot =
- pgprot_writecombine(vma->vm_page_prot);
- }
-
- /* flush out existing KMEM cached mappings if new ones are
- * of uncached type */
- if (IS_MEM_UNCACHED(gpriv->type))
- kgsl_cache_range_op(&gpriv->memdesc,
- KGSL_CACHE_OP_FLUSH);
-
- /* Add the other memory types here */
-
- /* Take a ref for this mapping of the object, so that the fault
- * handler can dereference the mmap offset's pointer to the object.
- * This reference is cleaned up by the corresponding vm_close
- * (which should happen whether the vma was created by this call, or
- * by a vm_open due to mremap or partial unmap or whatever).
- */
- drm_gem_object_reference(obj);
-
- vma->vm_file = filp; /* Needed for drm_vm_open() */
- drm_vm_open_locked(vma);
-
-out_unlock:
- mutex_unlock(&dev->struct_mutex);
-
- return ret;
-}
-
void
cleanup_fence(struct drm_kgsl_gem_object_fence *fence, int check_waiting)
{
@@ -1434,6 +1373,9 @@
DRM_IOCTL_DEF_DRV(KGSL_GEM_ALLOC, kgsl_gem_alloc_ioctl, 0),
DRM_IOCTL_DEF_DRV(KGSL_GEM_MMAP, kgsl_gem_mmap_ioctl, 0),
DRM_IOCTL_DEF_DRV(KGSL_GEM_GET_BUFINFO, kgsl_gem_get_bufinfo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_GET_ION_FD, kgsl_gem_get_ion_fd_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_CREATE_FROM_ION,
+ kgsl_gem_create_from_ion_ioctl, 0),
DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_BUFCOUNT,
kgsl_gem_set_bufcount_ioctl, 0),
DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_ACTIVE, kgsl_gem_set_active_ioctl, 0),
@@ -1447,28 +1389,22 @@
DRM_MASTER),
};
+static const struct file_operations kgsl_drm_driver_fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = drm_gem_mmap,
+ .poll = drm_poll,
+ .fasync = drm_fasync,
+};
+
static struct drm_driver driver = {
.driver_features = DRIVER_GEM,
- .load = kgsl_drm_load,
- .unload = kgsl_drm_unload,
- .preclose = kgsl_drm_preclose,
- .suspend = kgsl_drm_suspend,
- .resume = kgsl_drm_resume,
- .reclaim_buffers = drm_core_reclaim_buffers,
.gem_init_object = kgsl_gem_init_object,
.gem_free_object = kgsl_gem_free_object,
.ioctls = kgsl_drm_ioctls,
-
- .fops = {
- .owner = THIS_MODULE,
- .open = drm_open,
- .release = drm_release,
- .unlocked_ioctl = drm_ioctl,
- .mmap = msm_drm_gem_mmap,
- .poll = drm_poll,
- .fasync = drm_fasync,
- },
-
+ .fops = &kgsl_drm_driver_fops,
.name = DRIVER_NAME,
.desc = DRIVER_DESC,
.date = DRIVER_DATE,
@@ -1497,11 +1433,24 @@
gem_buf_fence[i].fence_id = ENTRY_EMPTY;
}
+ /* Create ION Client */
+ kgsl_drm_ion_client = msm_ion_client_create(
+ 0xffffffff, "kgsl_drm");
+ if (!kgsl_drm_ion_client) {
+ DRM_ERROR("Unable to create ION client\n");
+ return -ENOMEM;
+ }
+
return drm_platform_init(&driver, dev);
}
void kgsl_drm_exit(void)
{
kgsl_drm_inited = DRM_KGSL_NOT_INITED;
+
+ if (kgsl_drm_ion_client)
+ ion_client_destroy(kgsl_drm_ion_client);
+ kgsl_drm_ion_client = NULL;
+
drm_platform_exit(&driver, driver.kdriver.platform_device);
}
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
index 6798eed..9e9c0da 100644
--- a/drivers/gpu/msm/kgsl_events.c
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -149,6 +149,7 @@
* Send the current timestamp so the event knows how far the
* system got before the event was canceled
*/
+ list_del(&event->list);
trace_kgsl_fire_event(id, cur, jiffies - event->created);
@@ -156,7 +157,6 @@
event->func(device, event->priv, id, cur);
kgsl_context_put(context);
- list_del(&event->list);
kfree(event);
kgsl_active_count_put(device);
@@ -192,6 +192,7 @@
* the callback knows how far the GPU made it before things went
* explosion
*/
+ list_del(&event->list);
trace_kgsl_fire_event(KGSL_MEMSTORE_GLOBAL, cur,
jiffies - event->created);
@@ -202,8 +203,6 @@
if (event->context)
kgsl_context_put(event->context);
-
- list_del(&event->list);
kfree(event);
kgsl_active_count_put(device);
@@ -229,6 +228,7 @@
* confused if they don't bother comparing the current timetamp
* to the timestamp they wanted
*/
+ list_del(&event->list);
trace_kgsl_fire_event(id, event->timestamp,
jiffies - event->created);
@@ -238,8 +238,6 @@
if (event->context)
kgsl_context_put(event->context);
-
- list_del(&event->list);
kfree(event);
kgsl_active_count_put(device);
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
index 8f28505..5cc0dff 100644
--- a/drivers/gpu/msm/kgsl_gpummu.c
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,9 +19,11 @@
#include "kgsl.h"
#include "kgsl_mmu.h"
+#include "kgsl_gpummu.h"
#include "kgsl_device.h"
#include "kgsl_sharedmem.h"
#include "kgsl_trace.h"
+#include "adreno.h"
#define KGSL_PAGETABLE_SIZE \
ALIGN(KGSL_PAGETABLE_ENTRIES(CONFIG_MSM_KGSL_PAGE_TABLE_SIZE) * \
@@ -161,7 +163,7 @@
}
static void *
-_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, unsigned int *physaddr)
+_kgsl_ptpool_get_entry(struct kgsl_ptpool *pool, phys_addr_t *physaddr)
{
struct kgsl_ptpool_chunk *chunk;
@@ -227,7 +229,7 @@
*/
static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool,
- unsigned int *physaddr)
+ phys_addr_t *physaddr)
{
void *addr = NULL;
int ret;
@@ -363,10 +365,9 @@
return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
}
-void kgsl_gpummu_destroy_pagetable(void *mmu_specific_pt)
+void kgsl_gpummu_destroy_pagetable(struct kgsl_pagetable *pt)
{
- struct kgsl_gpummu_pt *gpummu_pt = (struct kgsl_gpummu_pt *)
- mmu_specific_pt;
+ struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
kgsl_ptpool_free((struct kgsl_ptpool *)kgsl_driver.ptpool,
gpummu_pt->base.hostptr);
@@ -403,11 +404,22 @@
{
unsigned int reg;
unsigned int ptbase;
+ struct kgsl_device *device;
+ struct adreno_device *adreno_dev;
+ unsigned int no_page_fault_log = 0;
- kgsl_regread(mmu->device, MH_MMU_PAGE_FAULT, ®);
- kgsl_regread(mmu->device, MH_MMU_PT_BASE, &ptbase);
+ device = mmu->device;
+ adreno_dev = ADRENO_DEVICE(device);
- KGSL_MEM_CRIT(mmu->device,
+ kgsl_regread(device, MH_MMU_PAGE_FAULT, ®);
+ kgsl_regread(device, MH_MMU_PT_BASE, &ptbase);
+
+
+ if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
+ no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, reg);
+
+ if (!no_page_fault_log)
+ KGSL_MEM_CRIT(mmu->device,
"mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n",
reg & ~(PAGE_SIZE - 1),
kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
@@ -516,6 +528,11 @@
*/
int status = 0;
+ mmu->pt_base = KGSL_PAGETABLE_BASE;
+ mmu->pt_size = CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
+ mmu->pt_per_process = KGSL_MMU_USE_PER_PROCESS_PT;
+ mmu->use_cpu_map = false;
+
/* sub-client MMU lookups require address translation */
if ((mmu->config & ~0x1) > 0) {
/*make sure virtual address range is a multiple of 64Kb */
@@ -572,7 +589,7 @@
if (mmu->defaultpagetable == NULL)
mmu->defaultpagetable =
- kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+ kgsl_mmu_getpagetable(mmu, KGSL_MMU_GLOBAL_PT);
/* Return error if the default pagetable doesn't exist */
if (mmu->defaultpagetable == NULL)
@@ -592,14 +609,14 @@
}
static int
-kgsl_gpummu_unmap(void *mmu_specific_pt,
+kgsl_gpummu_unmap(struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int *tlb_flags)
{
unsigned int numpages;
unsigned int pte, ptefirst, ptelast, superpte;
unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
- struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
+ struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
/* All GPU addresses as assigned are page aligned, but some
functions purturb the gpuaddr with an offset, so apply the
@@ -641,13 +658,13 @@
GSL_TLBFLUSH_FILTER_ISDIRTY((_p) / GSL_PT_SUPER_PTE))
static int
-kgsl_gpummu_map(void *mmu_specific_pt,
+kgsl_gpummu_map(struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int protflags,
unsigned int *tlb_flags)
{
unsigned int pte;
- struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
+ struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
struct scatterlist *s;
int flushtlb = 0;
int i;
diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h
index 99e7d5f..1753aff 100644
--- a/drivers/gpu/msm/kgsl_gpummu.h
+++ b/drivers/gpu/msm/kgsl_gpummu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -57,7 +57,7 @@
int dynamic;
void *data;
- unsigned int phys;
+ phys_addr_t phys;
unsigned long *bitmap;
struct list_head list;
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index f2393e4..739fcff 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -11,6 +11,7 @@
*
*/
#include <linux/types.h>
+#include <linux/delay.h>
#include <linux/device.h>
#include <linux/spinlock.h>
#include <linux/genalloc.h>
@@ -20,6 +21,7 @@
#include <mach/socinfo.h>
#include <mach/msm_iomap.h>
#include <mach/board.h>
+#include <mach/iommu_domains.h>
#include <stddef.h>
#include "kgsl.h"
@@ -31,24 +33,33 @@
#include "adreno.h"
#include "kgsl_trace.h"
#include "z180.h"
+#include "kgsl_cffdump.h"
-static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
+static struct kgsl_iommu_register_list kgsl_iommuv0_reg[KGSL_IOMMU_REG_MAX] = {
{ 0, 0, 0 }, /* GLOBAL_BASE */
{ 0x10, 0x0003FFFF, 14 }, /* TTBR0 */
{ 0x14, 0x0003FFFF, 14 }, /* TTBR1 */
{ 0x20, 0, 0 }, /* FSR */
{ 0x800, 0, 0 }, /* TLBIALL */
{ 0x820, 0, 0 }, /* RESUME */
+ { 0x03C, 0, 0 }, /* TLBLKCR */
+ { 0x818, 0, 0 }, /* V2PUR */
+ { 0x2C, 0, 0 }, /* FSYNR0 */
+ { 0x2C, 0, 0 }, /* FSYNR0 */
};
-static struct kgsl_iommu_register_list kgsl_iommuv2_reg[KGSL_IOMMU_REG_MAX] = {
+static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
{ 0, 0, 0 }, /* GLOBAL_BASE */
{ 0x20, 0x00FFFFFF, 14 }, /* TTBR0 */
{ 0x28, 0x00FFFFFF, 14 }, /* TTBR1 */
{ 0x58, 0, 0 }, /* FSR */
{ 0x618, 0, 0 }, /* TLBIALL */
- { 0x008, 0, 0 } /* RESUME */
+ { 0x008, 0, 0 }, /* RESUME */
+ { 0, 0, 0 }, /* TLBLKCR */
+ { 0, 0, 0 }, /* V2PUR */
+ { 0x68, 0, 0 }, /* FSYNR0 */
+ { 0x6C, 0, 0 } /* FSYNR1 */
};
struct remote_iommu_petersons_spinlock kgsl_iommu_sync_lock_vars;
@@ -100,8 +111,172 @@
return NULL;
}
+/* These functions help find the nearest allocated memory entries on either side
+ * of a faulting address. If we know the nearby allocations memory we can
+ * get a better determination of what we think should have been located in the
+ * faulting region
+ */
+
+/*
+ * A local structure to make it easy to store the interesting bits for the
+ * memory entries on either side of the faulting address
+ */
+
+struct _mem_entry {
+ unsigned int gpuaddr;
+ unsigned int size;
+ unsigned int flags;
+ unsigned int priv;
+ pid_t pid;
+};
+
+/*
+ * Find the closest alloated memory block with an smaller GPU address then the
+ * given address
+ */
+
+static void _prev_entry(struct kgsl_process_private *priv,
+ unsigned int faultaddr, struct _mem_entry *ret)
+{
+ struct rb_node *node;
+ struct kgsl_mem_entry *entry;
+
+ for (node = rb_first(&priv->mem_rb); node; ) {
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+
+ if (entry->memdesc.gpuaddr > faultaddr)
+ break;
+
+ /*
+ * If this is closer to the faulting address, then copy
+ * the entry
+ */
+
+ if (entry->memdesc.gpuaddr > ret->gpuaddr) {
+ ret->gpuaddr = entry->memdesc.gpuaddr;
+ ret->size = entry->memdesc.size;
+ ret->flags = entry->memdesc.flags;
+ ret->priv = entry->memdesc.priv;
+ ret->pid = priv->pid;
+ }
+
+ node = rb_next(&entry->node);
+ }
+}
+
+/*
+ * Find the closest alloated memory block with a greater starting GPU address
+ * then the given address
+ */
+
+static void _next_entry(struct kgsl_process_private *priv,
+ unsigned int faultaddr, struct _mem_entry *ret)
+{
+ struct rb_node *node;
+ struct kgsl_mem_entry *entry;
+
+ for (node = rb_last(&priv->mem_rb); node; ) {
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+
+ if (entry->memdesc.gpuaddr < faultaddr)
+ break;
+
+ /*
+ * If this is closer to the faulting address, then copy
+ * the entry
+ */
+
+ if (entry->memdesc.gpuaddr < ret->gpuaddr) {
+ ret->gpuaddr = entry->memdesc.gpuaddr;
+ ret->size = entry->memdesc.size;
+ ret->flags = entry->memdesc.flags;
+ ret->priv = entry->memdesc.priv;
+ ret->pid = priv->pid;
+ }
+
+ node = rb_prev(&entry->node);
+ }
+}
+
+static void _find_mem_entries(struct kgsl_mmu *mmu, unsigned int faultaddr,
+ unsigned int ptbase, struct _mem_entry *preventry,
+ struct _mem_entry *nextentry)
+{
+ struct kgsl_process_private *private;
+ int id = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
+
+ memset(preventry, 0, sizeof(*preventry));
+ memset(nextentry, 0, sizeof(*nextentry));
+
+ /* Set the maximum possible size as an initial value */
+ nextentry->gpuaddr = 0xFFFFFFFF;
+
+ mutex_lock(&kgsl_driver.process_mutex);
+
+ list_for_each_entry(private, &kgsl_driver.process_list, list) {
+
+ if (private->pagetable->name != id)
+ continue;
+
+ spin_lock(&private->mem_lock);
+ _prev_entry(private, faultaddr, preventry);
+ _next_entry(private, faultaddr, nextentry);
+ spin_unlock(&private->mem_lock);
+ }
+
+ mutex_unlock(&kgsl_driver.process_mutex);
+}
+
+static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry)
+{
+ char name[32];
+ memset(name, 0, sizeof(name));
+
+ kgsl_get_memory_usage(name, sizeof(name) - 1, entry->flags);
+
+ KGSL_LOG_DUMP(device,
+ "[%8.8X - %8.8X] %s (pid = %d) (%s)\n",
+ entry->gpuaddr,
+ entry->gpuaddr + entry->size,
+ entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "",
+ entry->pid, name);
+}
+
+static void _check_if_freed(struct kgsl_iommu_device *iommu_dev,
+ unsigned long addr, unsigned int pid)
+{
+ void *base = kgsl_driver.memfree_hist.base_hist_rb;
+ struct kgsl_memfree_hist_elem *wptr;
+ struct kgsl_memfree_hist_elem *p;
+
+ mutex_lock(&kgsl_driver.memfree_hist_mutex);
+ wptr = kgsl_driver.memfree_hist.wptr;
+ p = wptr;
+ for (;;) {
+ if (p->size && p->pid == pid)
+ if (addr >= p->gpuaddr &&
+ addr < (p->gpuaddr + p->size)) {
+
+ KGSL_LOG_DUMP(iommu_dev->kgsldev,
+ "---- premature free ----\n");
+ KGSL_LOG_DUMP(iommu_dev->kgsldev,
+ "[%8.8X-%8.8X] was already freed by pid %d\n",
+ p->gpuaddr,
+ p->gpuaddr + p->size,
+ p->pid);
+ }
+ p++;
+ if ((void *)p >= base + kgsl_driver.memfree_hist.size)
+ p = (struct kgsl_memfree_hist_elem *) base;
+
+ if (p == kgsl_driver.memfree_hist.wptr)
+ break;
+ }
+ mutex_unlock(&kgsl_driver.memfree_hist_mutex);
+}
+
static int kgsl_iommu_fault_handler(struct iommu_domain *domain,
- struct device *dev, unsigned long addr, int flags)
+ struct device *dev, unsigned long addr, int flags, void *token)
{
int ret = 0;
struct kgsl_mmu *mmu;
@@ -109,9 +284,17 @@
struct kgsl_iommu_unit *iommu_unit;
struct kgsl_iommu_device *iommu_dev;
unsigned int ptbase, fsr;
+ unsigned int pid;
+ struct _mem_entry prev, next;
+ unsigned int fsynr0, fsynr1;
+ int write;
struct kgsl_device *device;
struct adreno_device *adreno_dev;
unsigned int no_page_fault_log = 0;
+ unsigned int curr_context_id = 0;
+ unsigned int curr_global_ts = 0;
+ static struct adreno_context *curr_context;
+ static struct kgsl_context *context;
ret = get_iommu_unit(dev, &mmu, &iommu_unit);
if (ret)
@@ -131,6 +314,25 @@
fsr = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
iommu_dev->ctx_id, FSR);
+ fsynr0 = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
+ iommu_dev->ctx_id, FSYNR0);
+ fsynr1 = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
+ iommu_dev->ctx_id, FSYNR1);
+
+ if (msm_soc_version_supports_iommu_v0())
+ write = ((fsynr1 & (KGSL_IOMMU_FSYNR1_AWRITE_MASK <<
+ KGSL_IOMMU_FSYNR1_AWRITE_SHIFT)) ? 1 : 0);
+ else
+ write = ((fsynr0 & (KGSL_IOMMU_V1_FSYNR0_WNR_MASK <<
+ KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT)) ? 1 : 0);
+
+ pid = kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase);
+ KGSL_MEM_CRIT(iommu_dev->kgsldev,
+ "GPU PAGE FAULT: addr = %lX pid = %d\n", addr, pid);
+ KGSL_MEM_CRIT(iommu_dev->kgsldev,
+ "context = %d FSR = %X FSYNR0 = %X FSYNR1 = %X(%s fault)\n",
+ iommu_dev->ctx_id, fsr, fsynr0, fsynr1,
+ write ? "write" : "read");
if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr);
@@ -141,13 +343,49 @@
addr, kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase));
KGSL_MEM_CRIT(iommu_dev->kgsldev, "context = %d FSR = %X\n",
iommu_dev->ctx_id, fsr);
+
+ _check_if_freed(iommu_dev, addr, pid);
+
+ KGSL_LOG_DUMP(iommu_dev->kgsldev, "---- nearby memory ----\n");
+
+ _find_mem_entries(mmu, addr, ptbase, &prev, &next);
+
+ if (prev.gpuaddr)
+ _print_entry(iommu_dev->kgsldev, &prev);
+ else
+ KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n");
+
+ KGSL_LOG_DUMP(iommu_dev->kgsldev, " <- fault @ %8.8lX\n", addr);
+
+ if (next.gpuaddr != 0xFFFFFFFF)
+ _print_entry(iommu_dev->kgsldev, &next);
+ else
+ KGSL_LOG_DUMP(iommu_dev->kgsldev, "*EMPTY*\n");
+
}
mmu->fault = 1;
iommu_dev->fault = 1;
+ kgsl_sharedmem_readl(&device->memstore, &curr_context_id,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+ context = idr_find(&device->context_idr, curr_context_id);
+ if (context != NULL)
+ curr_context = context->devctxt;
+
+ kgsl_sharedmem_readl(&device->memstore, &curr_global_ts,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, eoptimestamp));
+
+ /*
+ * Store pagefault's timestamp in adreno context,
+ * this information will be used in GFT
+ */
+ curr_context->pagefault = 1;
+ curr_context->pagefault_ts = curr_global_ts;
+
trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
- kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase), 0);
+ kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
+ write ? "write" : "read");
/*
* We do not want the h/w to resume fetching data from an iommu unit
@@ -367,9 +605,9 @@
*
* Return - void
*/
-static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt)
+static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pt)
{
- struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
+ struct kgsl_iommu_pt *iommu_pt = pt->priv;
if (iommu_pt->domain)
iommu_domain_free(iommu_pt->domain);
kfree(iommu_pt);
@@ -384,28 +622,39 @@
*/
void *kgsl_iommu_create_pagetable(void)
{
+ int domain_num;
struct kgsl_iommu_pt *iommu_pt;
+ struct msm_iova_partition kgsl_partition = {
+ .start = 0,
+ .size = 0xFFFFFFFF,
+ };
+ struct msm_iova_layout kgsl_layout = {
+ .partitions = &kgsl_partition,
+ .npartitions = 1,
+ .client_name = "kgsl",
+ .domain_flags = 0,
+ };
+
iommu_pt = kzalloc(sizeof(struct kgsl_iommu_pt), GFP_KERNEL);
if (!iommu_pt) {
KGSL_CORE_ERR("kzalloc(%d) failed\n",
sizeof(struct kgsl_iommu_pt));
return NULL;
}
- /* L2 redirect is not stable on IOMMU v2 */
- if (msm_soc_version_supports_iommu_v1())
- iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
- MSM_IOMMU_DOMAIN_PT_CACHEABLE);
- else
- iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
- 0);
- if (!iommu_pt->domain) {
+ /* L2 redirect is not stable on IOMMU v1 */
+ if (msm_soc_version_supports_iommu_v0())
+ kgsl_layout.domain_flags = MSM_IOMMU_DOMAIN_PT_CACHEABLE;
+
+ domain_num = msm_register_domain(&kgsl_layout);
+ if (domain_num >= 0) {
+ iommu_pt->domain = msm_get_iommu_domain(domain_num);
+ iommu_set_fault_handler(iommu_pt->domain,
+ kgsl_iommu_fault_handler, NULL);
+ } else {
KGSL_CORE_ERR("Failed to create iommu domain\n");
kfree(iommu_pt);
return NULL;
- } else {
- iommu_set_fault_handler(iommu_pt->domain,
- kgsl_iommu_fault_handler);
}
return iommu_pt;
@@ -523,17 +772,23 @@
{
struct kgsl_iommu *iommu = mmu->priv;
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[unit_id];
- int i;
+ int i, j;
+ int found_ctx;
- if (data->iommu_ctx_count > KGSL_IOMMU_MAX_DEVS_PER_UNIT) {
- KGSL_CORE_ERR("Too many iommu devices defined for an "
- "IOMMU unit\n");
- return -EINVAL;
- }
-
- for (i = 0; i < data->iommu_ctx_count; i++) {
- if (!data->iommu_ctxs[i].iommu_ctx_name)
- continue;
+ for (j = 0; j < KGSL_IOMMU_MAX_DEVS_PER_UNIT; j++) {
+ found_ctx = 0;
+ for (i = 0; i < data->iommu_ctx_count; i++) {
+ if (j == data->iommu_ctxs[i].ctx_id) {
+ found_ctx = 1;
+ break;
+ }
+ }
+ if (!found_ctx)
+ break;
+ if (!data->iommu_ctxs[i].iommu_ctx_name) {
+ KGSL_CORE_ERR("Context name invalid\n");
+ return -EINVAL;
+ }
iommu_unit->dev[iommu_unit->dev_count].dev =
msm_iommu_get_ctx(data->iommu_ctxs[i].iommu_ctx_name);
@@ -542,12 +797,6 @@
"device %s\n", data->iommu_ctxs[i].iommu_ctx_name);
return -EINVAL;
}
- if (KGSL_IOMMU_CONTEXT_USER != data->iommu_ctxs[i].ctx_id &&
- KGSL_IOMMU_CONTEXT_PRIV != data->iommu_ctxs[i].ctx_id) {
- KGSL_CORE_ERR("Invalid context ID defined: %d\n",
- data->iommu_ctxs[i].ctx_id);
- return -EINVAL;
- }
iommu_unit->dev[iommu_unit->dev_count].ctx_id =
data->iommu_ctxs[i].ctx_id;
iommu_unit->dev[iommu_unit->dev_count].kgsldev = mmu->device;
@@ -559,6 +808,51 @@
iommu_unit->dev_count++;
}
+ if (!j) {
+ KGSL_CORE_ERR("No ctxts initialized, user ctxt absent\n ");
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * kgsl_iommu_start_sync_lock - Initialize some variables during MMU start up
+ * for GPU CPU synchronization
+ * @mmu - Pointer to mmu device
+ *
+ * Return - 0 on success else error code
+ */
+static int kgsl_iommu_start_sync_lock(struct kgsl_mmu *mmu)
+{
+ struct kgsl_iommu *iommu = mmu->priv;
+ uint32_t lock_gpu_addr = 0;
+
+ if (KGSL_DEVICE_3D0 != mmu->device->id ||
+ !msm_soc_version_supports_iommu_v0() ||
+ !kgsl_mmu_is_perprocess(mmu) ||
+ iommu->sync_lock_vars)
+ return 0;
+
+ if (!(mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC)) {
+ KGSL_DRV_ERR(mmu->device,
+ "The GPU microcode does not support IOMMUv1 sync opcodes\n");
+ return -ENXIO;
+ }
+ /* Store Lock variables GPU address */
+ lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr +
+ iommu->sync_lock_offset);
+
+ kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock,
+ flag[PROC_APPS])));
+ kgsl_iommu_sync_lock_vars.flag[PROC_GPU] = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock,
+ flag[PROC_GPU])));
+ kgsl_iommu_sync_lock_vars.turn = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock, turn)));
+
+ iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
return 0;
}
@@ -571,21 +865,30 @@
*/
static int kgsl_iommu_init_sync_lock(struct kgsl_mmu *mmu)
{
- struct kgsl_iommu *iommu = mmu->device->mmu.priv;
+ struct kgsl_iommu *iommu = mmu->priv;
int status = 0;
- struct kgsl_pagetable *pagetable = NULL;
- uint32_t lock_gpu_addr = 0;
uint32_t lock_phy_addr = 0;
uint32_t page_offset = 0;
- iommu->sync_lock_initialized = 0;
+ if (!msm_soc_version_supports_iommu_v0() ||
+ !kgsl_mmu_is_perprocess(mmu))
+ return status;
- if (!(mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC)) {
- KGSL_DRV_ERR(mmu->device,
- "The GPU microcode does not support IOMMUv1 sync opcodes\n");
- return -ENXIO;
+ /*
+ * For 2D devices cpu side sync lock is required. For 3D device,
+ * since we only have a single 3D core and we always ensure that
+ * 3D core is idle while writing to IOMMU register using CPU this
+ * lock is not required
+ */
+ if (KGSL_DEVICE_2D0 == mmu->device->id ||
+ KGSL_DEVICE_2D1 == mmu->device->id) {
+ return status;
}
+ /* Return if already initialized */
+ if (iommu->sync_lock_initialized)
+ return status;
+
/* Get the physical address of the Lock variables */
lock_phy_addr = (msm_iommu_lock_initialize()
- MSM_SHARED_RAM_BASE + msm_shared_ram_phys);
@@ -600,6 +903,7 @@
page_offset = (lock_phy_addr & (PAGE_SIZE - 1));
lock_phy_addr = (lock_phy_addr & ~(PAGE_SIZE - 1));
iommu->sync_lock_desc.physaddr = (unsigned int)lock_phy_addr;
+ iommu->sync_lock_offset = page_offset;
iommu->sync_lock_desc.size =
PAGE_ALIGN(sizeof(kgsl_iommu_sync_lock_vars));
@@ -610,35 +914,6 @@
if (status)
return status;
- /* Map Lock variables to GPU pagetable */
- iommu->sync_lock_desc.priv |= KGSL_MEMDESC_GLOBAL;
-
- pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
- mmu->defaultpagetable;
-
- status = kgsl_mmu_map(pagetable, &iommu->sync_lock_desc,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-
- if (status) {
- kgsl_mmu_unmap(pagetable, &iommu->sync_lock_desc);
- iommu->sync_lock_desc.priv &= ~KGSL_MEMDESC_GLOBAL;
- return status;
- }
-
- /* Store Lock variables GPU address */
- lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr + page_offset);
-
- kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
- (offsetof(struct remote_iommu_petersons_spinlock,
- flag[PROC_APPS])));
- kgsl_iommu_sync_lock_vars.flag[PROC_GPU] = (lock_gpu_addr +
- (offsetof(struct remote_iommu_petersons_spinlock,
- flag[PROC_GPU])));
- kgsl_iommu_sync_lock_vars.turn = (lock_gpu_addr +
- (offsetof(struct remote_iommu_petersons_spinlock, turn)));
-
- iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
-
/* Flag Sync Lock is Initialized */
iommu->sync_lock_initialized = 1;
@@ -897,6 +1172,75 @@
}
}
+/*
+ * kgsl_iommu_setup_regs - map iommu registers into a pagetable
+ * @mmu: Pointer to mmu structure
+ * @pt: the pagetable
+ *
+ * To do pagetable switches from the GPU command stream, the IOMMU
+ * registers need to be mapped into the GPU's pagetable. This function
+ * is used differently on different targets. On 8960, the registers
+ * are mapped into every pagetable during kgsl_setup_pt(). On
+ * all other targets, the registers are mapped only into the second
+ * context bank.
+ *
+ * Return - 0 on success else error code
+ */
+static int kgsl_iommu_setup_regs(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt)
+{
+ int status;
+ int i = 0;
+ struct kgsl_iommu *iommu = mmu->priv;
+
+ if (!msm_soc_version_supports_iommu_v0())
+ return 0;
+
+ for (i = 0; i < iommu->unit_count; i++) {
+ status = kgsl_mmu_map_global(pt,
+ &(iommu->iommu_units[i].reg_map));
+ if (status)
+ goto err;
+ }
+
+ /* Map Lock variables to GPU pagetable */
+ if (iommu->sync_lock_initialized) {
+ status = kgsl_mmu_map_global(pt, &iommu->sync_lock_desc);
+ if (status)
+ goto err;
+ }
+
+ return 0;
+err:
+ for (i--; i >= 0; i--)
+ kgsl_mmu_unmap(pt,
+ &(iommu->iommu_units[i].reg_map));
+
+ return status;
+}
+
+/*
+ * kgsl_iommu_cleanup_regs - unmap iommu registers from a pagetable
+ * @mmu: Pointer to mmu structure
+ * @pt: the pagetable
+ *
+ * Removes mappings created by kgsl_iommu_setup_regs().
+ *
+ * Return - 0 on success else error code
+ */
+static void kgsl_iommu_cleanup_regs(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt)
+{
+ struct kgsl_iommu *iommu = mmu->priv;
+ int i;
+ for (i = 0; i < iommu->unit_count; i++)
+ kgsl_mmu_unmap(pt, &(iommu->iommu_units[i].reg_map));
+
+ if (iommu->sync_lock_desc.gpuaddr)
+ kgsl_mmu_unmap(pt, &iommu->sync_lock_desc);
+}
+
+
static int kgsl_iommu_init(struct kgsl_mmu *mmu)
{
/*
@@ -921,16 +1265,45 @@
status = kgsl_set_register_map(mmu);
if (status)
goto done;
+ status = kgsl_iommu_init_sync_lock(mmu);
+ if (status)
+ goto done;
- iommu->iommu_reg_list = kgsl_iommuv1_reg;
- iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
+ /* We presently do not support per-process for IOMMU-v1 */
+ mmu->pt_per_process = KGSL_MMU_USE_PER_PROCESS_PT &&
+ msm_soc_version_supports_iommu_v0();
- if (msm_soc_version_supports_iommu_v1()) {
+ /*
+ * For IOMMU per-process pagetables, the allocatable range
+ * and the kernel global range must both be outside
+ * the userspace address range. There is a 1Mb gap
+ * between these address ranges to make overrun
+ * detection easier.
+ * For the shared pagetable case use 2GB and because
+ * mirroring the CPU address space is not possible and
+ * we're better off with extra room.
+ */
+ if (mmu->pt_per_process) {
+ mmu->pt_base = PAGE_OFFSET;
+ mmu->pt_size = KGSL_IOMMU_GLOBAL_MEM_BASE
+ - kgsl_mmu_get_base_addr(mmu) - SZ_1M;
+ mmu->use_cpu_map = true;
+ } else {
+ mmu->pt_base = KGSL_PAGETABLE_BASE;
+ mmu->pt_size = SZ_2G;
+ mmu->use_cpu_map = false;
+ }
+
+
+ iommu->iommu_reg_list = kgsl_iommuv0_reg;
+ iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V0;
+
+ if (msm_soc_version_supports_iommu_v0()) {
+ iommu->iommu_reg_list = kgsl_iommuv0_reg;
+ iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V0;
+ } else {
iommu->iommu_reg_list = kgsl_iommuv1_reg;
iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
- } else {
- iommu->iommu_reg_list = kgsl_iommuv2_reg;
- iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V2;
}
/* A nop is required in an indirect buffer when switching
@@ -939,6 +1312,15 @@
KGSL_IOMMU_SETSTATE_NOP_OFFSET,
cp_nop_packet(1));
+ if (cpu_is_msm8960()) {
+ /*
+ * 8960 doesn't have a second context bank, so the IOMMU
+ * registers must be mapped into every pagetable.
+ */
+ iommu_ops.mmu_setup_pt = kgsl_iommu_setup_regs;
+ iommu_ops.mmu_cleanup_pt = kgsl_iommu_cleanup_regs;
+ }
+
dev_info(mmu->device->dev, "|%s| MMU type set for device is IOMMU\n",
__func__);
done:
@@ -961,51 +1343,31 @@
static int kgsl_iommu_setup_defaultpagetable(struct kgsl_mmu *mmu)
{
int status = 0;
- int i = 0;
- struct kgsl_iommu *iommu = mmu->priv;
- struct kgsl_pagetable *pagetable = NULL;
/* If chip is not 8960 then we use the 2nd context bank for pagetable
* switching on the 3D side for which a separate table is allocated */
- if (!cpu_is_msm8960() && msm_soc_version_supports_iommu_v1()) {
+ if (!cpu_is_msm8960() && msm_soc_version_supports_iommu_v0()) {
mmu->priv_bank_table =
- kgsl_mmu_getpagetable(KGSL_MMU_PRIV_BANK_TABLE_NAME);
+ kgsl_mmu_getpagetable(mmu,
+ KGSL_MMU_PRIV_BANK_TABLE_NAME);
if (mmu->priv_bank_table == NULL) {
status = -ENOMEM;
goto err;
}
+ status = kgsl_iommu_setup_regs(mmu, mmu->priv_bank_table);
+ if (status)
+ goto err;
}
- mmu->defaultpagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+ mmu->defaultpagetable = kgsl_mmu_getpagetable(mmu, KGSL_MMU_GLOBAL_PT);
/* Return error if the default pagetable doesn't exist */
if (mmu->defaultpagetable == NULL) {
status = -ENOMEM;
goto err;
}
- pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
- mmu->defaultpagetable;
- /* Map the IOMMU regsiters to only defaultpagetable */
- if (msm_soc_version_supports_iommu_v1()) {
- for (i = 0; i < iommu->unit_count; i++) {
- iommu->iommu_units[i].reg_map.priv |=
- KGSL_MEMDESC_GLOBAL;
- status = kgsl_mmu_map(pagetable,
- &(iommu->iommu_units[i].reg_map),
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
- if (status) {
- iommu->iommu_units[i].reg_map.priv &=
- ~KGSL_MEMDESC_GLOBAL;
- goto err;
- }
- }
- }
return status;
err:
- for (i--; i >= 0; i--) {
- kgsl_mmu_unmap(pagetable,
- &(iommu->iommu_units[i].reg_map));
- iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMDESC_GLOBAL;
- }
if (mmu->priv_bank_table) {
+ kgsl_iommu_cleanup_regs(mmu, mmu->priv_bank_table);
kgsl_mmu_putpagetable(mmu->priv_bank_table);
mmu->priv_bank_table = NULL;
}
@@ -1016,9 +1378,113 @@
return status;
}
-static int kgsl_iommu_start(struct kgsl_mmu *mmu)
+/*
+ * kgsl_iommu_lock_rb_in_tlb - Allocates tlb entries and locks the
+ * virtual to physical address translation of ringbuffer for 3D
+ * device into tlb.
+ * @mmu - Pointer to mmu structure
+ *
+ * Return - void
+ */
+static void kgsl_iommu_lock_rb_in_tlb(struct kgsl_mmu *mmu)
{
struct kgsl_device *device = mmu->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct adreno_ringbuffer *rb;
+ struct kgsl_iommu *iommu = mmu->priv;
+ unsigned int num_tlb_entries;
+ unsigned int tlblkcr = 0;
+ unsigned int v2pxx = 0;
+ unsigned int vaddr = 0;
+ int i, j, k, l;
+
+ if (!iommu->sync_lock_initialized)
+ return;
+
+ rb = &adreno_dev->ringbuffer;
+ num_tlb_entries = rb->buffer_desc.size / PAGE_SIZE;
+
+ for (i = 0; i < iommu->unit_count; i++) {
+ struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ tlblkcr = 0;
+ if (cpu_is_msm8960())
+ tlblkcr |= ((num_tlb_entries &
+ KGSL_IOMMU_TLBLKCR_FLOOR_MASK) <<
+ KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT);
+ else
+ tlblkcr |= (((num_tlb_entries *
+ iommu_unit->dev_count) &
+ KGSL_IOMMU_TLBLKCR_FLOOR_MASK) <<
+ KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT);
+ /* Do not invalidate locked entries on tlbiall flush */
+ tlblkcr |= ((1 & KGSL_IOMMU_TLBLKCR_TLBIALLCFG_MASK)
+ << KGSL_IOMMU_TLBLKCR_TLBIALLCFG_SHIFT);
+ tlblkcr |= ((1 & KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_MASK)
+ << KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_SHIFT);
+ tlblkcr |= ((1 & KGSL_IOMMU_TLBLKCR_TLBIVAACFG_MASK)
+ << KGSL_IOMMU_TLBLKCR_TLBIVAACFG_SHIFT);
+ /* Enable tlb locking */
+ tlblkcr |= ((1 & KGSL_IOMMU_TLBLKCR_LKE_MASK)
+ << KGSL_IOMMU_TLBLKCR_LKE_SHIFT);
+ KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
+ iommu_unit->dev[j].ctx_id,
+ TLBLKCR, tlblkcr);
+ }
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ /* skip locking entries for private bank on 8960 */
+ if (cpu_is_msm8960() && KGSL_IOMMU_CONTEXT_PRIV == j)
+ continue;
+ /* Lock the ringbuffer virtual address into tlb */
+ vaddr = rb->buffer_desc.gpuaddr;
+ for (k = 0; k < num_tlb_entries; k++) {
+ v2pxx = 0;
+ v2pxx |= (((k + j * num_tlb_entries) &
+ KGSL_IOMMU_V2PXX_INDEX_MASK)
+ << KGSL_IOMMU_V2PXX_INDEX_SHIFT);
+ v2pxx |= vaddr & (KGSL_IOMMU_V2PXX_VA_MASK <<
+ KGSL_IOMMU_V2PXX_VA_SHIFT);
+
+ KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
+ iommu_unit->dev[j].ctx_id,
+ V2PUR, v2pxx);
+ vaddr += PAGE_SIZE;
+ for (l = 0; l < iommu_unit->dev_count; l++) {
+ tlblkcr = KGSL_IOMMU_GET_CTX_REG(iommu,
+ iommu_unit,
+ iommu_unit->dev[l].ctx_id,
+ TLBLKCR);
+ mb();
+ tlblkcr &=
+ ~(KGSL_IOMMU_TLBLKCR_VICTIM_MASK
+ << KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT);
+ tlblkcr |= (((k + 1 +
+ (j * num_tlb_entries)) &
+ KGSL_IOMMU_TLBLKCR_VICTIM_MASK) <<
+ KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT);
+ KGSL_IOMMU_SET_CTX_REG(iommu,
+ iommu_unit,
+ iommu_unit->dev[l].ctx_id,
+ TLBLKCR, tlblkcr);
+ }
+ }
+ }
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ tlblkcr = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
+ iommu_unit->dev[j].ctx_id,
+ TLBLKCR);
+ mb();
+ /* Disable tlb locking */
+ tlblkcr &= ~(KGSL_IOMMU_TLBLKCR_LKE_MASK
+ << KGSL_IOMMU_TLBLKCR_LKE_SHIFT);
+ KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit,
+ iommu_unit->dev[j].ctx_id, TLBLKCR, tlblkcr);
+ }
+ }
+}
+
+static int kgsl_iommu_start(struct kgsl_mmu *mmu)
+{
int status;
struct kgsl_iommu *iommu = mmu->priv;
int i, j;
@@ -1030,23 +1496,21 @@
status = kgsl_iommu_setup_defaultpagetable(mmu);
if (status)
return -ENOMEM;
-
- /* Initialize the sync lock between GPU and CPU */
- if (msm_soc_version_supports_iommu_v1() &&
- (device->id == KGSL_DEVICE_3D0))
- kgsl_iommu_init_sync_lock(mmu);
}
+ status = kgsl_iommu_start_sync_lock(mmu);
+ if (status)
+ return status;
/* We use the GPU MMU to control access to IOMMU registers on 8960 with
* a225, hence we still keep the MMU active on 8960 */
- if (cpu_is_msm8960()) {
+ if (cpu_is_msm8960() && KGSL_DEVICE_3D0 == mmu->device->id) {
struct kgsl_mh *mh = &(mmu->device->mh);
+ BUG_ON(iommu->iommu_units[0].reg_map.gpuaddr != 0 &&
+ mh->mpu_base > iommu->iommu_units[0].reg_map.gpuaddr);
kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000001);
+
kgsl_regwrite(mmu->device, MH_MMU_MPU_END,
- mh->mpu_base +
- iommu->iommu_units[0].reg_map.gpuaddr);
- } else {
- kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
+ mh->mpu_base + mh->mpu_range);
}
mmu->hwpagetable = mmu->defaultpagetable;
@@ -1071,6 +1535,7 @@
* changing pagetables we can use this lsb value of the pagetable w/o
* having to read it again
*/
+ msm_iommu_lock();
for (i = 0; i < iommu->unit_count; i++) {
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
for (j = 0; j < iommu_unit->dev_count; j++) {
@@ -1081,6 +1546,13 @@
TTBR0));
}
}
+ kgsl_iommu_lock_rb_in_tlb(mmu);
+ msm_iommu_unlock();
+
+ /* For complete CFF */
+ kgsl_cffdump_setmem(mmu->setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET,
+ cp_nop_packet(1), sizeof(unsigned int));
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
mmu->flags |= KGSL_FLAGS_STARTED;
@@ -1094,13 +1566,13 @@
}
static int
-kgsl_iommu_unmap(void *mmu_specific_pt,
+kgsl_iommu_unmap(struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int *tlb_flags)
{
int ret;
unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
- struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
+ struct kgsl_iommu_pt *iommu_pt = pt->priv;
/* All GPU addresses as assigned are page aligned, but some
functions purturb the gpuaddr with an offset, so apply the
@@ -1117,43 +1589,58 @@
"with err: %d\n", iommu_pt->domain, gpuaddr,
range, ret);
-#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
/*
* Flushing only required if per process pagetables are used. With
* global case, flushing will happen inside iommu_map function
*/
- if (!ret && msm_soc_version_supports_iommu_v1())
+ if (!ret && kgsl_mmu_is_perprocess(pt->mmu))
*tlb_flags = UINT_MAX;
-#endif
return 0;
}
static int
-kgsl_iommu_map(void *mmu_specific_pt,
+kgsl_iommu_map(struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int protflags,
unsigned int *tlb_flags)
{
int ret;
unsigned int iommu_virt_addr;
- struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
+ struct kgsl_iommu_pt *iommu_pt = pt->priv;
int size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
BUG_ON(NULL == iommu_pt);
+ /* if there's a guard page, we'll map it read only below */
+ if ((protflags & IOMMU_WRITE) && kgsl_memdesc_has_guard_page(memdesc))
+ size -= PAGE_SIZE;
iommu_virt_addr = memdesc->gpuaddr;
ret = iommu_map_range(iommu_pt->domain, iommu_virt_addr, memdesc->sg,
- size, (IOMMU_READ | IOMMU_WRITE));
+ size, protflags);
if (ret) {
- KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %d) "
- "failed with err: %d\n", iommu_pt->domain,
- iommu_virt_addr, memdesc->sg, size,
- (IOMMU_READ | IOMMU_WRITE), ret);
+ KGSL_CORE_ERR("iommu_map_range(%p, %x, %p, %d, %x) err: %d\n",
+ iommu_pt->domain, iommu_virt_addr, memdesc->sg, size,
+ protflags, ret);
return ret;
}
+ if ((protflags & IOMMU_WRITE) && kgsl_memdesc_has_guard_page(memdesc)) {
+ struct scatterlist *sg = &memdesc->sg[memdesc->sglen - 1];
+ ret = iommu_map(iommu_pt->domain, iommu_virt_addr + size,
+ kgsl_get_sg_pa(sg), PAGE_SIZE,
+ protflags & ~IOMMU_WRITE);
+ if (ret) {
+ KGSL_CORE_ERR("iommu_map(%p, %x, %x, %x) err: %d\n",
+ iommu_pt->domain, iommu_virt_addr + size,
+ kgsl_get_sg_pa(sg), protflags & ~IOMMU_WRITE,
+ ret);
+ /* cleanup the partial mapping */
+ iommu_unmap_range(iommu_pt->domain, iommu_virt_addr,
+ size);
+ }
+ }
return ret;
}
@@ -1166,7 +1653,6 @@
*
* call this with the global lock held
*/
-
if (mmu->flags & KGSL_FLAGS_STARTED) {
/* detach iommu attachment */
kgsl_detach_pagetable_iommu_domain(mmu);
@@ -1181,10 +1667,12 @@
for (j = 0; j < iommu_unit->dev_count; j++) {
if (iommu_unit->dev[j].fault) {
kgsl_iommu_enable_clk(mmu, j);
+ msm_iommu_lock();
KGSL_IOMMU_SET_CTX_REG(iommu,
iommu_unit,
iommu_unit->dev[j].ctx_id,
RESUME, 1);
+ msm_iommu_unlock();
iommu_unit->dev[j].fault = 0;
}
}
@@ -1202,22 +1690,28 @@
{
struct kgsl_iommu *iommu = mmu->priv;
int i;
- for (i = 0; i < iommu->unit_count; i++) {
- struct kgsl_pagetable *pagetable = (mmu->priv_bank_table ?
- mmu->priv_bank_table : mmu->defaultpagetable);
- if (iommu->iommu_units[i].reg_map.gpuaddr)
- kgsl_mmu_unmap(pagetable,
- &(iommu->iommu_units[i].reg_map));
- if (iommu->iommu_units[i].reg_map.hostptr)
- iounmap(iommu->iommu_units[i].reg_map.hostptr);
- kgsl_sg_free(iommu->iommu_units[i].reg_map.sg,
- iommu->iommu_units[i].reg_map.sglen);
+
+ if (mmu->priv_bank_table != NULL) {
+ kgsl_iommu_cleanup_regs(mmu, mmu->priv_bank_table);
+ kgsl_mmu_putpagetable(mmu->priv_bank_table);
}
- if (mmu->priv_bank_table)
- kgsl_mmu_putpagetable(mmu->priv_bank_table);
- if (mmu->defaultpagetable)
+ if (mmu->defaultpagetable != NULL)
kgsl_mmu_putpagetable(mmu->defaultpagetable);
+
+ for (i = 0; i < iommu->unit_count; i++) {
+ struct kgsl_memdesc *reg_map = &iommu->iommu_units[i].reg_map;
+
+ if (reg_map->hostptr)
+ iounmap(reg_map->hostptr);
+ kgsl_sg_free(reg_map->sg, reg_map->sglen);
+ reg_map->priv &= ~KGSL_MEMDESC_GLOBAL;
+ }
+ /* clear IOMMU GPU CPU sync structures */
+ kgsl_sg_free(iommu->sync_lock_desc.sg, iommu->sync_lock_desc.sglen);
+ memset(&iommu->sync_lock_desc, 0, sizeof(iommu->sync_lock_desc));
+ iommu->sync_lock_vars = NULL;
+
kfree(iommu);
return 0;
@@ -1273,12 +1767,15 @@
pt_base &= (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
- //if (msm_soc_version_supports_iommu_v1())
+ /* For v0 SMMU GPU needs to be idle for tlb invalidate as well */
+ if (msm_soc_version_supports_iommu_v0())
+ kgsl_idle(mmu->device);
+
/* Acquire GPU-CPU sync Lock here */
msm_iommu_lock();
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
- if (!msm_soc_version_supports_iommu_v1())
+ if (!msm_soc_version_supports_iommu_v0())
kgsl_idle(mmu->device);
for (i = 0; i < iommu->unit_count; i++) {
/* get the lsb value which should not change when
@@ -1357,6 +1854,9 @@
.mmu_get_num_iommu_units = kgsl_iommu_get_num_iommu_units,
.mmu_pt_equal = kgsl_iommu_pt_equal,
.mmu_get_pt_base_addr = kgsl_iommu_get_pt_base_addr,
+ /* These callbacks will be set on some chipsets */
+ .mmu_setup_pt = NULL,
+ .mmu_cleanup_pt = NULL,
.mmu_sync_lock = kgsl_iommu_sync_lock,
.mmu_sync_unlock = kgsl_iommu_sync_unlock,
};
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index 4507700..c09bc4b 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -15,10 +15,37 @@
#include <mach/iommu.h>
-#define KGSL_IOMMU_CTX_OFFSET_V1 0
-#define KGSL_IOMMU_CTX_OFFSET_V2 0x8000
+#define KGSL_IOMMU_CTX_OFFSET_V0 0
+#define KGSL_IOMMU_CTX_OFFSET_V1 0x8000
#define KGSL_IOMMU_CTX_SHIFT 12
+/* TLBLKCR fields */
+#define KGSL_IOMMU_TLBLKCR_LKE_MASK 0x00000001
+#define KGSL_IOMMU_TLBLKCR_LKE_SHIFT 0
+#define KGSL_IOMMU_TLBLKCR_TLBIALLCFG_MASK 0x00000001
+#define KGSL_IOMMU_TLBLKCR_TLBIALLCFG_SHIFT 1
+#define KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_MASK 0x00000001
+#define KGSL_IOMMU_TLBLKCR_TLBIASIDCFG_SHIFT 2
+#define KGSL_IOMMU_TLBLKCR_TLBIVAACFG_MASK 0x00000001
+#define KGSL_IOMMU_TLBLKCR_TLBIVAACFG_SHIFT 3
+#define KGSL_IOMMU_TLBLKCR_FLOOR_MASK 0x000000FF
+#define KGSL_IOMMU_TLBLKCR_FLOOR_SHIFT 8
+#define KGSL_IOMMU_TLBLKCR_VICTIM_MASK 0x000000FF
+#define KGSL_IOMMU_TLBLKCR_VICTIM_SHIFT 16
+
+/* V2PXX fields */
+#define KGSL_IOMMU_V2PXX_INDEX_MASK 0x000000FF
+#define KGSL_IOMMU_V2PXX_INDEX_SHIFT 0
+#define KGSL_IOMMU_V2PXX_VA_MASK 0x000FFFFF
+#define KGSL_IOMMU_V2PXX_VA_SHIFT 12
+
+/* FSYNR1 V0 fields */
+#define KGSL_IOMMU_FSYNR1_AWRITE_MASK 0x00000001
+#define KGSL_IOMMU_FSYNR1_AWRITE_SHIFT 8
+/* FSYNR0 V1 fields */
+#define KGSL_IOMMU_V1_FSYNR0_WNR_MASK 0x00000001
+#define KGSL_IOMMU_V1_FSYNR0_WNR_SHIFT 4
+
enum kgsl_iommu_reg_map {
KGSL_IOMMU_GLOBAL_BASE = 0,
KGSL_IOMMU_CTX_TTBR0,
@@ -26,6 +53,10 @@
KGSL_IOMMU_CTX_FSR,
KGSL_IOMMU_CTX_TLBIALL,
KGSL_IOMMU_CTX_RESUME,
+ KGSL_IOMMU_CTX_TLBLKCR,
+ KGSL_IOMMU_CTX_V2PUR,
+ KGSL_IOMMU_CTX_FSYNR0,
+ KGSL_IOMMU_CTX_FSYNR1,
KGSL_IOMMU_REG_MAX
};
@@ -124,6 +155,8 @@
* IOMMU registers
* @sync_lock_desc: GPU Memory descriptor for the memory containing the
* spinlocks
+ * @sync_lock_offset - The page offset within a page at which the sync
+ * variables are located
* @sync_lock_initialized: True if the sync_lock feature is enabled
*/
struct kgsl_iommu {
@@ -136,6 +169,7 @@
struct kgsl_iommu_register_list *iommu_reg_list;
struct remote_iommu_petersons_spinlock *sync_lock_vars;
struct kgsl_memdesc sync_lock_desc;
+ unsigned int sync_lock_offset;
bool sync_lock_initialized;
};
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
index 83d14f7..a7832e4 100644
--- a/drivers/gpu/msm/kgsl_log.h
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2008-2011,2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index d1f58c4..4e95373 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -23,13 +23,11 @@
#include "kgsl.h"
#include "kgsl_mmu.h"
+#include "kgsl_gpummu.h"
#include "kgsl_device.h"
#include "kgsl_sharedmem.h"
#include "adreno.h"
-#define KGSL_MMU_ALIGN_SHIFT 13
-#define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
-
static enum kgsl_mmutype kgsl_mmu_type;
static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable);
@@ -37,17 +35,18 @@
static int kgsl_cleanup_pt(struct kgsl_pagetable *pt)
{
int i;
- /* For IOMMU only unmap the global structures to global pt */
- if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
- (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
- (KGSL_MMU_GLOBAL_PT != pt->name) &&
- (KGSL_MMU_PRIV_BANK_TABLE_NAME != pt->name))
- return 0;
+ struct kgsl_device *device;
+
for (i = 0; i < KGSL_DEVICE_MAX; i++) {
- struct kgsl_device *device = kgsl_driver.devp[i];
+ device = kgsl_driver.devp[i];
if (device)
device->ftbl->cleanup_pt(device, pt);
}
+ /* Only the 3d device needs mmu specific pt entries */
+ device = kgsl_driver.devp[KGSL_DEVICE_3D0];
+ if (device->mmu.mmu_ops->mmu_cleanup_pt != NULL)
+ device->mmu.mmu_ops->mmu_cleanup_pt(&device->mmu, pt);
+
return 0;
}
@@ -56,21 +55,25 @@
{
int i = 0;
int status = 0;
+ struct kgsl_device *device;
- /* For IOMMU only map the global structures to global pt */
- if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
- (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
- (KGSL_MMU_GLOBAL_PT != pt->name) &&
- (KGSL_MMU_PRIV_BANK_TABLE_NAME != pt->name))
- return 0;
for (i = 0; i < KGSL_DEVICE_MAX; i++) {
- struct kgsl_device *device = kgsl_driver.devp[i];
+ device = kgsl_driver.devp[i];
if (device) {
status = device->ftbl->setup_pt(device, pt);
if (status)
goto error_pt;
}
}
+ /* Only the 3d device needs mmu specific pt entries */
+ device = kgsl_driver.devp[KGSL_DEVICE_3D0];
+ if (device->mmu.mmu_ops->mmu_setup_pt != NULL) {
+ status = device->mmu.mmu_ops->mmu_setup_pt(&device->mmu, pt);
+ if (status) {
+ i = KGSL_DEVICE_MAX - 1;
+ goto error_pt;
+ }
+ }
return status;
error_pt:
while (i >= 0) {
@@ -101,7 +104,7 @@
if (pagetable->pool)
gen_pool_destroy(pagetable->pool);
- pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv);
+ pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
kfree(pagetable);
}
@@ -191,7 +194,7 @@
if (pt) {
ret += snprintf(buf, PAGE_SIZE, "0x%x\n",
- kgsl_mmu_get_ptsize());
+ kgsl_mmu_get_ptsize(pt->mmu));
}
kgsl_put_pagetable(pt);
@@ -310,22 +313,6 @@
return ret;
}
-unsigned int kgsl_mmu_get_ptsize(void)
-{
- /*
- * For IOMMU, we could do up to 4G virtual range if we wanted to, but
- * it makes more sense to return a smaller range and leave the rest of
- * the virtual range for future improvements
- */
-
- if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type)
- return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
- else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type)
- return SZ_2G - KGSL_PAGETABLE_BASE;
- else
- return 0;
-}
-
int
kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, unsigned int pt_base)
{
@@ -355,15 +342,15 @@
unsigned int ret = 0;
if (!mmu->mmu_ops || !mmu->mmu_ops->mmu_pt_equal)
- return KGSL_MMU_GLOBAL_PT;
+ return 0;
spin_lock(&kgsl_driver.ptlock);
list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
- if ((addr & (PAGE_SIZE-1)) == pt->fault_addr) {
+ if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) {
ret = 1;
break;
} else {
- pt->fault_addr = (addr & (PAGE_SIZE-1));
+ pt->fault_addr = (addr & ~(PAGE_SIZE-1));
ret = 0;
break;
}
@@ -458,7 +445,8 @@
}
EXPORT_SYMBOL(kgsl_mh_intrcallback);
-static struct kgsl_pagetable *kgsl_mmu_createpagetableobject(
+static struct kgsl_pagetable *
+kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu,
unsigned int name)
{
int status = 0;
@@ -477,8 +465,8 @@
spin_lock_init(&pagetable->lock);
- ptsize = kgsl_mmu_get_ptsize();
-
+ ptsize = kgsl_mmu_get_ptsize(mmu);
+ pagetable->mmu = mmu;
pagetable->name = name;
pagetable->max_entries = KGSL_PAGETABLE_ENTRIES(ptsize);
pagetable->fault_addr = 0xFFFFFFFF;
@@ -490,10 +478,10 @@
if ((KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) &&
((KGSL_MMU_GLOBAL_PT == name) ||
(KGSL_MMU_PRIV_BANK_TABLE_NAME == name))) {
- pagetable->kgsl_pool = gen_pool_create(PAGE_SHIFT, -1);
+ pagetable->kgsl_pool = gen_pool_create(ilog2(SZ_8K), -1);
if (pagetable->kgsl_pool == NULL) {
KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
- KGSL_MMU_ALIGN_SHIFT);
+ ilog2(SZ_8K));
goto err_alloc;
}
if (gen_pool_add(pagetable->kgsl_pool,
@@ -504,14 +492,14 @@
}
}
- pagetable->pool = gen_pool_create(KGSL_MMU_ALIGN_SHIFT, -1);
+ pagetable->pool = gen_pool_create(PAGE_SHIFT, -1);
if (pagetable->pool == NULL) {
KGSL_CORE_ERR("gen_pool_create(%d) failed\n",
- KGSL_MMU_ALIGN_SHIFT);
+ PAGE_SHIFT);
goto err_kgsl_pool;
}
- if (gen_pool_add(pagetable->pool, KGSL_PAGETABLE_BASE,
+ if (gen_pool_add(pagetable->pool, kgsl_mmu_get_base_addr(mmu),
ptsize, -1)) {
KGSL_CORE_ERR("gen_pool_add failed\n");
goto err_pool;
@@ -540,7 +528,7 @@
return pagetable;
err_mmu_create:
- pagetable->pt_ops->mmu_destroy_pagetable(pagetable->priv);
+ pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
err_pool:
gen_pool_destroy(pagetable->pool);
err_kgsl_pool:
@@ -552,24 +540,21 @@
return NULL;
}
-struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name)
+struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu,
+ unsigned long name)
{
struct kgsl_pagetable *pt;
if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
return (void *)(-1);
-#ifndef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
- name = KGSL_MMU_GLOBAL_PT;
-#endif
- /* We presently do not support per-process for IOMMU-v2 */
- if (!msm_soc_version_supports_iommu_v1())
+ if (!kgsl_mmu_is_perprocess(mmu))
name = KGSL_MMU_GLOBAL_PT;
pt = kgsl_get_pagetable(name);
if (pt == NULL)
- pt = kgsl_mmu_createpagetableobject(name);
+ pt = kgsl_mmu_createpagetableobject(mmu, name);
return pt;
}
@@ -626,24 +611,15 @@
*/
}
-static inline struct gen_pool *
-_get_pool(struct kgsl_pagetable *pagetable, unsigned int flags)
-{
- if (pagetable->kgsl_pool &&
- (KGSL_MEMDESC_GLOBAL & flags))
- return pagetable->kgsl_pool;
- return pagetable->pool;
-}
-
int
kgsl_mmu_map(struct kgsl_pagetable *pagetable,
- struct kgsl_memdesc *memdesc,
- unsigned int protflags)
+ struct kgsl_memdesc *memdesc)
{
int ret;
- struct gen_pool *pool;
+ struct gen_pool *pool = NULL;
int size;
int page_align = ilog2(PAGE_SIZE);
+ unsigned int protflags = kgsl_memdesc_protflags(memdesc);
if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) {
if (memdesc->sglen == 1) {
@@ -665,33 +641,57 @@
size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
- /* Allocate from kgsl pool if it exists for global mappings */
- pool = _get_pool(pagetable, memdesc->priv);
+ pool = pagetable->pool;
- /* Allocate aligned virtual addresses for iommu. This allows
- * more efficient pagetable entries if the physical memory
- * is also aligned. Don't do this for GPUMMU, because
- * the address space is so small.
- */
- if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype() &&
- kgsl_memdesc_get_align(memdesc) > 0)
- page_align = kgsl_memdesc_get_align(memdesc);
-
- memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size, page_align);
- if (memdesc->gpuaddr == 0) {
- KGSL_CORE_ERR("gen_pool_alloc(%d) failed from pool: %s\n",
- size,
- (pool == pagetable->kgsl_pool) ?
- "kgsl_pool" : "general_pool");
- KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
- pagetable->name, pagetable->stats.mapped,
- pagetable->stats.entries);
- return -ENOMEM;
+ if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+ /* Allocate aligned virtual addresses for iommu. This allows
+ * more efficient pagetable entries if the physical memory
+ * is also aligned. Don't do this for GPUMMU, because
+ * the address space is so small.
+ */
+ if (kgsl_memdesc_get_align(memdesc) > 0)
+ page_align = kgsl_memdesc_get_align(memdesc);
+ if (kgsl_memdesc_is_global(memdesc)) {
+ /*
+ * Only the default pagetable has a kgsl_pool, and
+ * it is responsible for creating the mapping for
+ * each global buffer. The mapping will be reused
+ * in all other pagetables and it must already exist
+ * when we're creating other pagetables which do not
+ * have a kgsl_pool.
+ */
+ pool = pagetable->kgsl_pool;
+ if (pool == NULL && memdesc->gpuaddr == 0) {
+ KGSL_CORE_ERR(
+ "No address for global mapping into pt %d\n",
+ pagetable->name);
+ return -EINVAL;
+ }
+ } else if (kgsl_memdesc_use_cpu_map(memdesc)) {
+ if (memdesc->gpuaddr == 0)
+ return -EINVAL;
+ pool = NULL;
+ }
+ }
+ if (pool) {
+ memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size,
+ page_align);
+ if (memdesc->gpuaddr == 0) {
+ KGSL_CORE_ERR("gen_pool_alloc(%d) failed, pool: %s\n",
+ size,
+ (pool == pagetable->kgsl_pool) ?
+ "kgsl_pool" : "general_pool");
+ KGSL_CORE_ERR(" [%d] allocated=%d, entries=%d\n",
+ pagetable->name,
+ pagetable->stats.mapped,
+ pagetable->stats.entries);
+ return -ENOMEM;
+ }
}
if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
spin_lock(&pagetable->lock);
- ret = pagetable->pt_ops->mmu_map(pagetable->priv, memdesc, protflags,
+ ret = pagetable->pt_ops->mmu_map(pagetable, memdesc, protflags,
&pagetable->tlb_flags);
if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
spin_lock(&pagetable->lock);
@@ -713,7 +713,8 @@
err_free_gpuaddr:
spin_unlock(&pagetable->lock);
- gen_pool_free(pool, memdesc->gpuaddr, size);
+ if (pool)
+ gen_pool_free(pool, memdesc->gpuaddr, size);
memdesc->gpuaddr = 0;
return ret;
}
@@ -743,7 +744,7 @@
if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
spin_lock(&pagetable->lock);
- pagetable->pt_ops->mmu_unmap(pagetable->priv, memdesc,
+ pagetable->pt_ops->mmu_unmap(pagetable, memdesc,
&pagetable->tlb_flags);
/* If buffer is unmapped 0 fault addr */
@@ -759,21 +760,29 @@
spin_unlock(&pagetable->lock);
- pool = _get_pool(pagetable, memdesc->priv);
- gen_pool_free(pool, memdesc->gpuaddr, size);
+ pool = pagetable->pool;
+
+ if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
+ if (kgsl_memdesc_is_global(memdesc))
+ pool = pagetable->kgsl_pool;
+ else if (kgsl_memdesc_use_cpu_map(memdesc))
+ pool = NULL;
+ }
+ if (pool)
+ gen_pool_free(pool, memdesc->gpuaddr, size);
/*
* Don't clear the gpuaddr on global mappings because they
* may be in use by other pagetables
*/
- if (!(memdesc->priv & KGSL_MEMDESC_GLOBAL))
+ if (!kgsl_memdesc_is_global(memdesc))
memdesc->gpuaddr = 0;
return 0;
}
EXPORT_SYMBOL(kgsl_mmu_unmap);
int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
- struct kgsl_memdesc *memdesc, unsigned int protflags)
+ struct kgsl_memdesc *memdesc)
{
int result = -EINVAL;
unsigned int gpuaddr = 0;
@@ -785,19 +794,17 @@
/* Not all global mappings are needed for all MMU types */
if (!memdesc->size)
return 0;
-
gpuaddr = memdesc->gpuaddr;
memdesc->priv |= KGSL_MEMDESC_GLOBAL;
- result = kgsl_mmu_map(pagetable, memdesc, protflags);
+ result = kgsl_mmu_map(pagetable, memdesc);
if (result)
goto error;
/*global mappings must have the same gpu address in all pagetables*/
if (gpuaddr && gpuaddr != memdesc->gpuaddr) {
- KGSL_CORE_ERR("pt %p addr mismatch phys 0x%08x"
- "gpu 0x%0x 0x%08x", pagetable, memdesc->physaddr,
- gpuaddr, memdesc->gpuaddr);
+ KGSL_CORE_ERR("pt %p addr mismatch phys %pa gpu 0x%0x 0x%08x",
+ pagetable, &memdesc->physaddr, gpuaddr, memdesc->gpuaddr);
goto error_unmap;
}
return result;
@@ -890,12 +897,18 @@
}
EXPORT_SYMBOL(kgsl_mmu_set_mmutype);
-int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr)
+int kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, unsigned int gpuaddr)
{
if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
return 1;
- return ((gpuaddr >= KGSL_PAGETABLE_BASE) &&
- (gpuaddr < (KGSL_PAGETABLE_BASE + kgsl_mmu_get_ptsize())));
+ if (gpuaddr >= kgsl_mmu_get_base_addr(pt->mmu) &&
+ gpuaddr < kgsl_mmu_get_base_addr(pt->mmu) +
+ kgsl_mmu_get_ptsize(pt->mmu))
+ return 1;
+ if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU
+ && kgsl_mmu_is_perprocess(pt->mmu))
+ return (gpuaddr > 0 && gpuaddr < TASK_SIZE);
+ return 0;
}
EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index 377f342..d7d9516 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,16 +13,23 @@
#ifndef __KGSL_MMU_H
#define __KGSL_MMU_H
-/*
- * These defines control the split between ttbr1 and ttbr0 pagetables of IOMMU
- * and what ranges of memory we map to them
- */
-#define KGSL_IOMMU_GLOBAL_MEM_BASE 0xC0000000
-#define KGSL_IOMMU_GLOBAL_MEM_SIZE SZ_4M
-#define KGSL_IOMMU_TTBR1_SPLIT 2
+#include <mach/iommu.h>
-#define KGSL_MMU_ALIGN_SHIFT 13
-#define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
+/*
+ * These defines control the address range for allocations that
+ * are mapped into all pagetables.
+ */
+#define KGSL_IOMMU_GLOBAL_MEM_BASE 0xf8000000
+#define KGSL_IOMMU_GLOBAL_MEM_SIZE SZ_4M
+
+#define KGSL_MMU_ALIGN_MASK (~((1 << PAGE_SHIFT) - 1))
+
+/* defconfig option for disabling per process pagetables */
+#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
+#define KGSL_MMU_USE_PER_PROCESS_PT true
+#else
+#define KGSL_MMU_USE_PER_PROCESS_PT false
+#endif
/* Identifier for the global page table */
/* Per process page tables will probably pass in the thread group
@@ -116,6 +123,7 @@
unsigned int tlb_flags;
unsigned int fault_addr;
void *priv;
+ struct kgsl_mmu *mmu;
};
struct kgsl_mmu;
@@ -149,24 +157,26 @@
unsigned int (*mmu_get_pt_base_addr)
(struct kgsl_mmu *mmu,
struct kgsl_pagetable *pt);
+ int (*mmu_setup_pt) (struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt);
+ void (*mmu_cleanup_pt) (struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt);
unsigned int (*mmu_sync_lock)
- (struct kgsl_mmu *mmu,
- unsigned int *cmds);
+ (struct kgsl_mmu *mmu, unsigned int *cmds);
unsigned int (*mmu_sync_unlock)
- (struct kgsl_mmu *mmu,
- unsigned int *cmds);
+ (struct kgsl_mmu *mmu, unsigned int *cmds);
};
struct kgsl_mmu_pt_ops {
- int (*mmu_map) (void *mmu_pt,
+ int (*mmu_map) (struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int protflags,
unsigned int *tlb_flags);
- int (*mmu_unmap) (void *mmu_pt,
+ int (*mmu_unmap) (struct kgsl_pagetable *pt,
struct kgsl_memdesc *memdesc,
unsigned int *tlb_flags);
void *(*mmu_create_pagetable) (void);
- void (*mmu_destroy_pagetable) (void *pt);
+ void (*mmu_destroy_pagetable) (struct kgsl_pagetable *);
};
#define KGSL_MMU_FLAGS_IOMMU_SYNC BIT(31)
@@ -185,14 +195,19 @@
const struct kgsl_mmu_ops *mmu_ops;
void *priv;
int fault;
+ unsigned long pt_base;
+ unsigned long pt_size;
+ bool pt_per_process;
+ bool use_cpu_map;
};
-#include "kgsl_gpummu.h"
-
extern struct kgsl_mmu_ops iommu_ops;
extern struct kgsl_mmu_pt_ops iommu_pt_ops;
+extern struct kgsl_mmu_ops gpummu_ops;
+extern struct kgsl_mmu_pt_ops gpummu_pt_ops;
-struct kgsl_pagetable *kgsl_mmu_getpagetable(unsigned long name);
+struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *,
+ unsigned long name);
void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable);
void kgsl_mh_start(struct kgsl_device *device);
void kgsl_mh_intrcallback(struct kgsl_device *device);
@@ -200,10 +215,9 @@
int kgsl_mmu_start(struct kgsl_device *device);
int kgsl_mmu_close(struct kgsl_device *device);
int kgsl_mmu_map(struct kgsl_pagetable *pagetable,
- struct kgsl_memdesc *memdesc,
- unsigned int protflags);
+ struct kgsl_memdesc *memdesc);
int kgsl_mmu_map_global(struct kgsl_pagetable *pagetable,
- struct kgsl_memdesc *memdesc, unsigned int protflags);
+ struct kgsl_memdesc *memdesc);
int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
struct kgsl_memdesc *memdesc);
unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
@@ -220,8 +234,7 @@
int kgsl_mmu_enabled(void);
void kgsl_mmu_set_mmutype(char *mmutype);
enum kgsl_mmutype kgsl_mmu_get_mmutype(void);
-unsigned int kgsl_mmu_get_ptsize(void);
-int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr);
+int kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, unsigned int gpuaddr);
/*
* Static inline functions of MMU that simply call the SMMU specific
@@ -332,6 +345,56 @@
return 0;
}
+/*
+ * kgsl_mmu_is_perprocess() - Runtime check for per-process
+ * pagetables.
+ * @mmu: the mmu
+ *
+ * Returns true if per-process pagetables are enabled,
+ * false if not.
+ */
+static inline int kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu)
+{
+ return mmu->pt_per_process;
+}
+
+/*
+ * kgsl_mmu_use_cpu_map() - Runtime check for matching the CPU
+ * address space on the GPU.
+ * @mmu: the mmu
+ *
+ * Returns true if supported false if not.
+ */
+static inline int kgsl_mmu_use_cpu_map(struct kgsl_mmu *mmu)
+{
+ return mmu->pt_per_process;
+}
+
+/*
+ * kgsl_mmu_base_addr() - Get gpu virtual address base.
+ * @mmu: the mmu
+ *
+ * Returns the start address of the allocatable gpu
+ * virtual address space. Other mappings that mirror
+ * the CPU address space are possible outside this range.
+ */
+static inline unsigned int kgsl_mmu_get_base_addr(struct kgsl_mmu *mmu)
+{
+ return mmu->pt_base;
+}
+
+/*
+ * kgsl_mmu_get_ptsize() - Get gpu pagetable size
+ * @mmu: the mmu
+ *
+ * Returns the usable size of the gpu allocatable
+ * address space.
+ */
+static inline unsigned int kgsl_mmu_get_ptsize(struct kgsl_mmu *mmu)
+{
+ return mmu->pt_size;
+}
+
static inline int kgsl_mmu_sync_lock(struct kgsl_mmu *mmu,
unsigned int *cmds)
{
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index d489119..2f8d93e 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -23,13 +23,13 @@
#include "kgsl_pwrscale.h"
#include "kgsl_device.h"
#include "kgsl_trace.h"
+#include "kgsl_sharedmem.h"
#define KGSL_PWRFLAGS_POWER_ON 0
#define KGSL_PWRFLAGS_CLK_ON 1
#define KGSL_PWRFLAGS_AXI_ON 2
#define KGSL_PWRFLAGS_IRQ_ON 3
-#define GPU_SWFI_LATENCY 3
#define UPDATE_BUSY_VAL 1000000
#define UPDATE_BUSY 50
@@ -130,6 +130,16 @@
*/
pwr->active_pwrlevel = new_level;
+ pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
+
+ if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
+
+ if (pwr->pcl)
+ msm_bus_scale_client_update_request(pwr->pcl,
+ pwrlevel->bus_freq);
+ else if (pwr->ebi1_clk)
+ clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
+ }
if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) ||
(device->state == KGSL_STATE_NAP)) {
@@ -156,16 +166,6 @@
}
}
- pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
-
- if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
-
- if (pwr->pcl)
- msm_bus_scale_client_update_request(pwr->pcl,
- pwrlevel->bus_freq);
- else if (pwr->ebi1_clk)
- clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
- }
trace_kgsl_pwrlevel(device, pwr->active_pwrlevel, pwrlevel->gpu_freq);
}
@@ -357,13 +357,13 @@
return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1);
}
-/* Given a GPU clock value, return the nearest powerlevel */
+/* Given a GPU clock value, return the lowest matching powerlevel */
static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock)
{
int i;
- for (i = 0; i < pwr->num_pwrlevels - 1; i++) {
+ for (i = pwr->num_pwrlevels - 1; i >= 0; i--) {
if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000)
return i;
}
@@ -515,7 +515,6 @@
struct kgsl_device *device = kgsl_device_from_dev(dev);
struct kgsl_pwrctrl *pwr;
const long div = 1000/HZ;
- static unsigned int org_interval_timeout = 1;
int rc;
if (device == NULL)
@@ -528,15 +527,11 @@
if (rc)
return rc;
- if (org_interval_timeout == 1)
- org_interval_timeout = pwr->interval_timeout;
-
mutex_lock(&device->mutex);
/* Let the timeout be requested in ms, but convert to jiffies. */
val /= div;
- if (val >= org_interval_timeout)
- pwr->interval_timeout = val;
+ pwr->interval_timeout = val;
mutex_unlock(&device->mutex);
@@ -548,10 +543,48 @@
char *buf)
{
struct kgsl_device *device = kgsl_device_from_dev(dev);
+ int mul = 1000/HZ;
+ if (device == NULL)
+ return 0;
+ /* Show the idle_timeout converted to msec */
+ return snprintf(buf, PAGE_SIZE, "%d\n",
+ device->pwrctrl.interval_timeout * mul);
+}
+
+static int kgsl_pwrctrl_pmqos_latency_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ char temp[20];
+ unsigned long val;
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ int rc;
+
+ if (device == NULL)
+ return 0;
+
+ snprintf(temp, sizeof(temp), "%.*s",
+ (int)min(count, sizeof(temp) - 1), buf);
+ rc = kstrtoul(temp, 0, &val);
+ if (rc)
+ return rc;
+
+ mutex_lock(&device->mutex);
+ device->pwrctrl.pm_qos_latency = val;
+ mutex_unlock(&device->mutex);
+
+ return count;
+}
+
+static int kgsl_pwrctrl_pmqos_latency_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
if (device == NULL)
return 0;
return snprintf(buf, PAGE_SIZE, "%d\n",
- device->pwrctrl.interval_timeout);
+ device->pwrctrl.pm_qos_latency);
}
static int kgsl_pwrctrl_gpubusy_show(struct device *dev,
@@ -615,6 +648,14 @@
return num_chars;
}
+static int kgsl_pwrctrl_reset_count_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter);
+}
+
DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show, kgsl_pwrctrl_gpuclk_store);
DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show,
kgsl_pwrctrl_max_gpuclk_store);
@@ -640,6 +681,12 @@
DEVICE_ATTR(num_pwrlevels, 0444,
kgsl_pwrctrl_num_pwrlevels_show,
NULL);
+DEVICE_ATTR(pmqos_latency, 0644,
+ kgsl_pwrctrl_pmqos_latency_show,
+ kgsl_pwrctrl_pmqos_latency_store);
+DEVICE_ATTR(reset_count, 0444,
+ kgsl_pwrctrl_reset_count_show,
+ NULL);
static const struct device_attribute *pwrctrl_attr_list[] = {
&dev_attr_gpuclk,
@@ -653,6 +700,8 @@
&dev_attr_min_pwrlevel,
&dev_attr_thermal_pwrlevel,
&dev_attr_num_pwrlevels,
+ &dev_attr_pmqos_latency,
+ &dev_attr_reset_count,
NULL
};
@@ -684,6 +733,9 @@
clkstats->on_time_old = on_time;
clkstats->elapsed_old = clkstats->elapsed;
clkstats->elapsed = 0;
+
+ trace_kgsl_gpubusy(device, clkstats->on_time_old,
+ clkstats->elapsed_old);
}
/* Track the amount of time the gpu is on vs the total system time. *
@@ -714,23 +766,23 @@
/* High latency clock maintenance. */
if ((pwr->pwrlevels[0].gpu_freq > 0) &&
(requested_state != KGSL_STATE_NAP)) {
- clk_set_rate(pwr->grp_clks[0],
- pwr->pwrlevels[pwr->num_pwrlevels - 1].
- gpu_freq);
for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
if (pwr->grp_clks[i])
clk_unprepare(pwr->grp_clks[i]);
+ clk_set_rate(pwr->grp_clks[0],
+ pwr->pwrlevels[pwr->num_pwrlevels - 1].
+ gpu_freq);
}
kgsl_pwrctrl_busy_time(device, true);
} else if (requested_state == KGSL_STATE_SLEEP) {
/* High latency clock maintenance. */
+ for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+ if (pwr->grp_clks[i])
+ clk_unprepare(pwr->grp_clks[i]);
if ((pwr->pwrlevels[0].gpu_freq > 0))
clk_set_rate(pwr->grp_clks[0],
pwr->pwrlevels[pwr->num_pwrlevels - 1].
gpu_freq);
- for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
- if (pwr->grp_clks[i])
- clk_unprepare(pwr->grp_clks[i]);
}
} else if (state == KGSL_PWRFLAGS_ON) {
if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON,
@@ -738,15 +790,14 @@
trace_kgsl_clk(device, state);
/* High latency clock maintenance. */
if (device->state != KGSL_STATE_NAP) {
- for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
- if (pwr->grp_clks[i])
- clk_prepare(pwr->grp_clks[i]);
-
if (pwr->pwrlevels[0].gpu_freq > 0)
clk_set_rate(pwr->grp_clks[0],
pwr->pwrlevels
[pwr->active_pwrlevel].
gpu_freq);
+ for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+ if (pwr->grp_clks[i])
+ clk_prepare(pwr->grp_clks[i]);
}
/* as last step, enable grp_clk
this is to let GPU interrupt to come */
@@ -878,7 +929,8 @@
if (pdata->set_grp_async != NULL)
pdata->set_grp_async();
- if (pdata->num_levels > KGSL_MAX_PWRLEVELS) {
+ if (pdata->num_levels > KGSL_MAX_PWRLEVELS ||
+ pdata->num_levels < 1) {
KGSL_PWR_ERR(device, "invalid power level count: %d\n",
pdata->num_levels);
result = -EINVAL;
@@ -947,6 +999,11 @@
}
}
+ /* Set the power level step multiplier with 1 as the default */
+ pwr->step_mul = pdata->step_mul ? pdata->step_mul : 1;
+
+ /* Set the CPU latency to 501usec to allow low latency PC modes */
+ pwr->pm_qos_latency = 501;
pm_runtime_enable(device->parentdev);
register_early_suspend(&device->display_off);
@@ -1143,7 +1200,7 @@
_sleep_accounting(device);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP);
- pm_qos_update_request(&device->pm_qos_req_dma,
+ pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
PM_QOS_DEFAULT_VALUE);
break;
case KGSL_STATE_SLEEP:
@@ -1177,7 +1234,7 @@
device->ftbl->stop(device);
_sleep_accounting(device);
kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
- pm_qos_update_request(&device->pm_qos_req_dma,
+ pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
PM_QOS_DEFAULT_VALUE);
break;
case KGSL_STATE_SLUMBER:
@@ -1224,10 +1281,15 @@
void kgsl_pwrctrl_wake(struct kgsl_device *device)
{
int status;
+ unsigned int context_id;
+ unsigned int state = device->state;
+ unsigned int ts_processed = 0xdeaddead;
+ struct kgsl_context *context;
+
kgsl_pwrctrl_request_state(device, KGSL_STATE_ACTIVE);
switch (device->state) {
case KGSL_STATE_SLUMBER:
- status = device->ftbl->start(device, 0);
+ status = device->ftbl->start(device);
if (status) {
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
KGSL_DRV_ERR(device, "start failed %d\n", status);
@@ -1237,6 +1299,17 @@
case KGSL_STATE_SLEEP:
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
kgsl_pwrscale_wake(device);
+ kgsl_sharedmem_readl(&device->memstore,
+ (unsigned int *) &context_id,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ current_context));
+ context = idr_find(&device->context_idr, context_id);
+ if (context)
+ ts_processed = kgsl_readtimestamp(device, context,
+ KGSL_TIMESTAMP_RETIRED);
+ KGSL_PWR_INFO(device, "Wake from %s state. CTXT: %d RTRD TS: %08X\n",
+ kgsl_pwrstate_to_str(state),
+ context ? context->id : -1, ts_processed);
/* fall through */
case KGSL_STATE_NAP:
/* Turn on the core clocks */
@@ -1247,8 +1320,8 @@
/* Re-enable HW access */
mod_timer(&device->idle_timer,
jiffies + device->pwrctrl.interval_timeout);
- pm_qos_update_request(&device->pm_qos_req_dma,
- GPU_SWFI_LATENCY);
+ pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+ device->pwrctrl.pm_qos_latency);
case KGSL_STATE_ACTIVE:
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
break;
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
index 8d66505..ced52e1 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -60,6 +60,9 @@
* @irq_name - resource name for the IRQ
* @restore_slumber - Flag to indicate that we are in a suspend/restore sequence
* @clk_stats - structure of clock statistics
+ * @pm_qos_req_dma - the power management quality of service structure
+ * @pm_qos_latency - allowed CPU latency in microseconds
+ * @step_mul - multiplier for moving between power levels
*/
struct kgsl_pwrctrl {
@@ -85,6 +88,9 @@
s64 time;
unsigned int restore_slumber;
struct kgsl_clk_stats clk_stats;
+ struct pm_qos_request pm_qos_req_dma;
+ unsigned int pm_qos_latency;
+ unsigned int step_mul;
};
void kgsl_pwrctrl_irq(struct kgsl_device *device, int state);
diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c
index dffae70..02ada38 100644
--- a/drivers/gpu/msm/kgsl_pwrscale.c
+++ b/drivers/gpu/msm/kgsl_pwrscale.c
@@ -237,16 +237,14 @@
void kgsl_pwrscale_busy(struct kgsl_device *device)
{
if (PWRSCALE_ACTIVE(device) && device->pwrscale.policy->busy)
- if (device->requested_state != KGSL_STATE_SLUMBER)
- device->pwrscale.policy->busy(device,
- &device->pwrscale);
+ device->pwrscale.policy->busy(device,
+ &device->pwrscale);
}
void kgsl_pwrscale_idle(struct kgsl_device *device)
{
if (PWRSCALE_ACTIVE(device) && device->pwrscale.policy->idle)
- if (device->requested_state != KGSL_STATE_SLUMBER &&
- device->requested_state != KGSL_STATE_SLEEP)
+ if (device->state == KGSL_STATE_ACTIVE)
device->pwrscale.policy->idle(device,
&device->pwrscale);
}
diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
index aa6861e..9b2ac70 100644
--- a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
+++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2010-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -38,6 +38,10 @@
* per frame for 60fps content.
*/
#define FLOOR 5000
+/* CEILING is 50msec, larger than any standard
+ * frame length, but less than the idle timer.
+ */
+#define CEILING 50000
#define SWITCH_OFF 200
#define SWITCH_OFF_RESET_TH 40
#define SKIP_COUNTER 500
@@ -163,11 +167,24 @@
priv->no_switch_cnt = 0;
}
- idle = priv->bin.total_time - priv->bin.busy_time;
+ /* If there is an extended block of busy processing,
+ * increase frequency. Otherwise run the normal algorithm.
+ */
+ if (priv->bin.busy_time > CEILING) {
+ val = -1;
+ } else {
+ idle = priv->bin.total_time - priv->bin.busy_time;
+ idle = (idle > 0) ? idle : 0;
+ val = __secure_tz_entry(TZ_UPDATE_ID, idle, device->id);
+ }
priv->bin.total_time = 0;
priv->bin.busy_time = 0;
- idle = (idle > 0) ? idle : 0;
- val = __secure_tz_entry(TZ_UPDATE_ID, idle, device->id);
+
+ /* If the decision is to move to a lower level, make sure the GPU
+ * frequency drops.
+ */
+ if (val > 0)
+ val *= pwr->step_mul;
if (val)
kgsl_pwrctrl_pwrlevel_change(device,
pwr->active_pwrlevel + val);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index a345e58..595f78f 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -154,9 +154,7 @@
static struct mem_entry_stats mem_stats[] = {
MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel),
-#ifdef CONFIG_ANDROID_PMEM
MEM_ENTRY_STAT(KGSL_MEM_ENTRY_PMEM, pmem),
-#endif
#ifdef CONFIG_ASHMEM
MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ASHMEM, ashmem),
#endif
@@ -511,21 +509,29 @@
void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op)
{
- void *addr = memdesc->hostptr;
+ /*
+ * If the buffer is mapped in the kernel operate on that address
+ * otherwise use the user address
+ */
+
+ void *addr = (memdesc->hostptr) ?
+ memdesc->hostptr : (void *) memdesc->useraddr;
+
int size = memdesc->size;
- switch (op) {
- case KGSL_CACHE_OP_FLUSH:
- dmac_flush_range(addr, addr + size);
- break;
- case KGSL_CACHE_OP_CLEAN:
- dmac_clean_range(addr, addr + size);
- break;
- case KGSL_CACHE_OP_INV:
- dmac_inv_range(addr, addr + size);
- break;
+ if (addr != NULL) {
+ switch (op) {
+ case KGSL_CACHE_OP_FLUSH:
+ dmac_flush_range(addr, addr + size);
+ break;
+ case KGSL_CACHE_OP_CLEAN:
+ dmac_clean_range(addr, addr + size);
+ break;
+ case KGSL_CACHE_OP_INV:
+ dmac_inv_range(addr, addr + size);
+ break;
+ }
}
-
outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, op);
}
EXPORT_SYMBOL(kgsl_cache_range_op);
@@ -533,7 +539,7 @@
static int
_kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable,
- size_t size, unsigned int protflags)
+ size_t size)
{
int pcount = 0, order, ret = 0;
int j, len, page_size, sglen_alloc, sglen = 0;
@@ -541,13 +547,15 @@
pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
void *ptr;
unsigned int align;
+ int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
page_size = (align >= ilog2(SZ_64K) && size >= SZ_64K)
? SZ_64K : PAGE_SIZE;
/* update align flags for what we actually use */
- kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+ if (page_size != PAGE_SIZE)
+ kgsl_memdesc_set_align(memdesc, ilog2(page_size));
/*
* There needs to be enough room in the sg structure to be able to
@@ -568,11 +576,10 @@
memdesc->pagetable = pagetable;
memdesc->ops = &kgsl_page_alloc_ops;
- memdesc->sg = kgsl_sg_alloc(sglen_alloc);
+ memdesc->sglen_alloc = sglen_alloc;
+ memdesc->sg = kgsl_sg_alloc(memdesc->sglen_alloc);
if (memdesc->sg == NULL) {
- KGSL_CORE_ERR("vmalloc(%d) failed\n",
- sglen_alloc * sizeof(struct scatterlist));
ret = -ENOMEM;
goto done;
}
@@ -584,26 +591,24 @@
* two pages; well within the acceptable limits for using kmalloc.
*/
- pages = kmalloc(sglen_alloc * sizeof(struct page *), GFP_KERNEL);
+ pages = kmalloc(memdesc->sglen_alloc * sizeof(struct page *),
+ GFP_KERNEL);
if (pages == NULL) {
- KGSL_CORE_ERR("kmalloc (%d) failed\n",
- sglen_alloc * sizeof(struct page *));
ret = -ENOMEM;
goto done;
}
kmemleak_not_leak(memdesc->sg);
- memdesc->sglen_alloc = sglen_alloc;
- sg_init_table(memdesc->sg, sglen_alloc);
+ sg_init_table(memdesc->sg, memdesc->sglen_alloc);
len = size;
while (len > 0) {
struct page *page;
unsigned int gfp_mask = GFP_KERNEL | __GFP_HIGHMEM |
- __GFP_NOWARN;
+ __GFP_NOWARN | __GFP_NORETRY;
int j;
/* don't waste space at the end of the allocation*/
@@ -667,41 +672,42 @@
* zeroed and unmaped each individual page, and then we had to turn
* around and call flush_dcache_page() on that page to clear the caches.
* This was killing us for performance. Instead, we found it is much
- * faster to allocate the pages without GFP_ZERO, map the entire range,
- * memset it, flush the range and then unmap - this results in a factor
- * of 4 improvement for speed for large buffers. There is a small
- * increase in speed for small buffers, but only on the order of a few
- * microseconds at best. The only downside is that there needs to be
- * enough temporary space in vmalloc to accomodate the map. This
- * shouldn't be a problem, but if it happens, fall back to a much slower
- * path
+ * faster to allocate the pages without GFP_ZERO, map a chunk of the
+ * range ('step' pages), memset it, flush it and then unmap
+ * - this results in a factor of 4 improvement for speed for large
+ * buffers. There is a small decrease in speed for small buffers,
+ * but only on the order of a few microseconds at best. The 'step'
+ * size is based on a guess at the amount of free vmalloc space, but
+ * will scale down if there's not enough free space.
*/
+ for (j = 0; j < pcount; j += step) {
+ step = min(step, pcount - j);
- ptr = vmap(pages, pcount, VM_IOREMAP, page_prot);
+ ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
- if (ptr != NULL) {
- memset(ptr, 0, memdesc->size);
- dmac_flush_range(ptr, ptr + memdesc->size);
- vunmap(ptr);
- } else {
- /* Very, very, very slow path */
+ if (ptr != NULL) {
+ memset(ptr, 0, step * PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
+ vunmap(ptr);
+ } else {
+ int k;
+ /* Very, very, very slow path */
- for (j = 0; j < pcount; j++) {
- ptr = kmap_atomic(pages[j]);
- memset(ptr, 0, PAGE_SIZE);
- dmac_flush_range(ptr, ptr + PAGE_SIZE);
- kunmap_atomic(ptr);
+ for (k = j; k < j + step; k++) {
+ ptr = kmap_atomic(pages[k]);
+ memset(ptr, 0, PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + PAGE_SIZE);
+ kunmap_atomic(ptr);
+ }
+ /* scale down the step size to avoid this path */
+ if (step > 1)
+ step >>= 1;
}
}
outer_cache_range_op_sg(memdesc->sg, memdesc->sglen,
KGSL_CACHE_OP_FLUSH);
- ret = kgsl_mmu_map(pagetable, memdesc, protflags);
-
- if (ret)
- goto done;
-
KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
kgsl_driver.stats.page_alloc_max);
@@ -728,8 +734,7 @@
size = ALIGN(size, PAGE_SIZE * 2);
- ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
if (!ret)
ret = kgsl_page_alloc_map_kernel(memdesc);
if (ret)
@@ -743,17 +748,7 @@
struct kgsl_pagetable *pagetable,
size_t size)
{
- unsigned int protflags;
-
- if (size == 0)
- return -EINVAL;
-
- protflags = GSL_PT_PAGE_RV;
- if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
- protflags |= GSL_PT_PAGE_WV;
-
- return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
- protflags);
+ return _kgsl_sharedmem_page_alloc(memdesc, pagetable, PAGE_ALIGN(size));
}
EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
@@ -831,12 +826,6 @@
if (result)
goto err;
- result = kgsl_mmu_map(pagetable, memdesc,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-
- if (result)
- goto err;
-
KGSL_STATS_ADD(size, kgsl_driver.stats.coherent,
kgsl_driver.stats.coherent_max);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
index 3109ef2..279490f 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,6 +19,7 @@
#include "kgsl_mmu.h"
#include <linux/slab.h>
#include <linux/kmemleak.h>
+#include <linux/iommu.h>
#include "kgsl_log.h"
@@ -83,6 +84,18 @@
}
/*
+ * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc
+ * @memdesc: the memdesc
+ *
+ * Returns a KGSL_CACHEMODE* value.
+ */
+static inline int
+kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT;
+}
+
+/*
* kgsl_memdesc_set_align - Set alignment flags of a memdesc
* @memdesc - the memdesc
* @align - alignment requested, as a power of 2 exponent.
@@ -144,7 +157,7 @@
unsigned int physaddr, unsigned int size)
{
memdesc->sg = kgsl_sg_alloc(1);
- if (!memdesc->sg)
+ if (memdesc->sg == NULL)
return -ENOMEM;
kmemleak_not_leak(memdesc->sg);
@@ -157,14 +170,98 @@
return 0;
}
+/*
+ * kgsl_memdesc_is_global - is this a globally mapped buffer?
+ * @memdesc: the memdesc
+ *
+ * Returns nonzero if this is a global mapping, 0 otherwise
+ */
+static inline int kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->priv & KGSL_MEMDESC_GLOBAL) != 0;
+}
+
+/*
+ * kgsl_memdesc_has_guard_page - is the last page a guard page?
+ * @memdesc - the memdesc
+ *
+ * Returns nonzero if there is a guard page, 0 otherwise
+ */
+static inline int
+kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0;
+}
+
+/*
+ * kgsl_memdesc_protflags - get mmu protection flags
+ * @memdesc - the memdesc
+ * Returns a mask of GSL_PT_PAGE* or IOMMU* values based
+ * on the memdesc flags.
+ */
+static inline unsigned int
+kgsl_memdesc_protflags(const struct kgsl_memdesc *memdesc)
+{
+ unsigned int protflags = 0;
+ enum kgsl_mmutype mmutype = kgsl_mmu_get_mmutype();
+
+ if (mmutype == KGSL_MMU_TYPE_GPU) {
+ protflags = GSL_PT_PAGE_RV;
+ if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
+ protflags |= GSL_PT_PAGE_WV;
+ } else if (mmutype == KGSL_MMU_TYPE_IOMMU) {
+ protflags = IOMMU_READ;
+ if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
+ protflags |= IOMMU_WRITE;
+ }
+ return protflags;
+}
+
+/*
+ * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
+ * @memdesc - the memdesc
+ */
+static inline int
+kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0;
+}
+
+/*
+ * kgsl_memdesc_mmapsize - get the size of the mmap region
+ * @memdesc - the memdesc
+ *
+ * The entire memdesc must be mapped. Additionally if the
+ * CPU mapping is going to be mirrored, there must be room
+ * for the guard page to be mapped so that the address spaces
+ * match up.
+ */
+static inline unsigned int
+kgsl_memdesc_mmapsize(const struct kgsl_memdesc *memdesc)
+{
+ unsigned int size = memdesc->size;
+ if (kgsl_memdesc_use_cpu_map(memdesc) &&
+ kgsl_memdesc_has_guard_page(memdesc))
+ size += SZ_4K;
+ return size;
+}
+
static inline int
kgsl_allocate(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable, size_t size)
{
+ int ret;
+ memdesc->priv |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
return kgsl_sharedmem_ebimem(memdesc, pagetable, size);
- memdesc->flags |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
- return kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
+
+ ret = kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
+ if (ret)
+ return ret;
+ ret = kgsl_mmu_map(pagetable, memdesc);
+ if (ret)
+ kgsl_sharedmem_free(memdesc);
+ return ret;
}
static inline int
@@ -174,6 +271,9 @@
{
int ret;
+ if (size == 0)
+ return -EINVAL;
+
memdesc->flags = flags;
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index a5aa42f..4c9c744 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -106,7 +106,12 @@
{
struct kgsl_snapshot_linux_context *header = _ctxtptr;
struct kgsl_context *context = ptr;
- struct kgsl_device *device = context->dev_priv->device;
+ struct kgsl_device *device;
+
+ if (context)
+ device = context->dev_priv->device;
+ else
+ device = (struct kgsl_device *)data;
header->id = id;
@@ -141,6 +146,9 @@
idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
+ /* Increment ctxcount for the global memstore */
+ ctxtcount++;
+
size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
/* Make sure there is enough room for the data */
@@ -169,8 +177,9 @@
header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
header->busclk = kgsl_get_clkrate(pwr->ebi1_clk);
- /* Future proof for per-context timestamps */
- header->current_context = -1;
+ /* Save the last active context */
+ kgsl_sharedmem_readl(&device->memstore, &header->current_context,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
/* Get the current PT base */
header->ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
@@ -185,8 +194,10 @@
header->ctxtcount = ctxtcount;
- /* append information for each context */
_ctxtptr = snapshot + sizeof(*header);
+ /* append information for the global context */
+ snapshot_context_info(KGSL_MEMSTORE_GLOBAL, NULL, device);
+ /* append information for each context */
idr_for_each(&device->context_idr, snapshot_context_info, NULL);
/* Return the size of the data segment */
@@ -283,7 +294,7 @@
{
list_del(&obj->node);
- obj->entry->flags &= ~KGSL_MEM_ENTRY_FROZEN;
+ obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN;
kgsl_mem_entry_put(obj->entry);
kfree(obj);
@@ -375,8 +386,8 @@
/* If the buffer is already on the list, skip it */
list_for_each_entry(obj, &device->snapshot_obj_list, node) {
if (obj->gpuaddr == gpuaddr && obj->ptbase == ptbase) {
- /* If the size is different, use the new size */
- if (obj->size != size)
+ /* If the size is different, use the bigger size */
+ if (obj->size < size)
obj->size = size;
return 0;
@@ -416,10 +427,10 @@
* 0 so it doesn't get counted twice
*/
- if (entry->flags & KGSL_MEM_ENTRY_FROZEN)
+ if (entry->memdesc.priv & KGSL_MEMDESC_FROZEN)
return 0;
- entry->flags |= KGSL_MEM_ENTRY_FROZEN;
+ entry->memdesc.priv |= KGSL_MEMDESC_FROZEN;
return entry->memdesc.size;
}
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
index 327d18a..4db2815 100644
--- a/drivers/gpu/msm/kgsl_snapshot.h
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -52,6 +52,7 @@
#define KGSL_SNAPSHOT_SECTION_DEBUG 0x0901
#define KGSL_SNAPSHOT_SECTION_DEBUGBUS 0x0A01
#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT 0x0B01
+#define KGSL_SNAPSHOT_SECTION_MEMLIST 0x0E01
#define KGSL_SNAPSHOT_SECTION_END 0xFFFF
@@ -103,6 +104,17 @@
int count; /* Number of dwords in the dump */
} __packed;
+/* Replay or Memory list section, both sections have same header */
+struct kgsl_snapshot_replay_mem_list {
+ /*
+ * Number of IBs to replay for replay section or
+ * number of memory list entries for mem list section
+ */
+ int num_entries;
+ /* Pagetable base to which the replay IBs or memory entries belong */
+ __u32 ptbase;
+} __packed;
+
/* Indirect buffer sub-section header */
struct kgsl_snapshot_ib {
__u32 gpuaddr; /* GPU address of the the IB */
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
index d9ab081..0e3e046 100644
--- a/drivers/gpu/msm/kgsl_sync.c
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -69,7 +69,6 @@
struct kgsl_fence_event_priv {
struct kgsl_context *context;
- unsigned int timestamp;
};
/**
@@ -86,7 +85,7 @@
void *priv, u32 context_id, u32 timestamp)
{
struct kgsl_fence_event_priv *ev = priv;
- kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
+ kgsl_sync_timeline_signal(ev->context->timeline, timestamp);
kgsl_context_put(ev->context);
kfree(ev);
}
@@ -126,7 +125,6 @@
if (event == NULL)
return -ENOMEM;
event->context = context;
- event->timestamp = timestamp;
kgsl_context_get(context);
pt = kgsl_sync_pt_create(context->timeline, timestamp);
@@ -181,6 +179,7 @@
}
static const struct sync_timeline_ops kgsl_sync_timeline_ops = {
+ .driver_name = "kgsl-timeline",
.dup = kgsl_sync_pt_dup,
.has_signaled = kgsl_sync_pt_has_signaled,
.compare = kgsl_sync_pt_compare,
@@ -207,7 +206,7 @@
struct kgsl_sync_timeline *ktimeline =
(struct kgsl_sync_timeline *) timeline;
- if (timestamp_cmp(timestamp, ktimeline->last_timestamp > 0))
+ if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0)
ktimeline->last_timestamp = timestamp;
sync_timeline_signal(timeline);
}
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index c54445c..5f7ee3c 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -24,6 +24,8 @@
#include <linux/tracepoint.h>
#include "kgsl_device.h"
+#include "adreno_drawctxt.h"
+
struct kgsl_device;
struct kgsl_ringbuffer_issueibcmds;
struct kgsl_device_waittimestamp;
@@ -34,11 +36,16 @@
TRACE_EVENT(kgsl_issueibcmds,
TP_PROTO(struct kgsl_device *device,
- struct kgsl_ringbuffer_issueibcmds *cmd,
+ int drawctxt_id,
struct kgsl_ibdesc *ibdesc,
- int result),
+ int numibs,
+ int timestamp,
+ int flags,
+ int result,
+ unsigned int type),
- TP_ARGS(device, cmd, ibdesc, result),
+ TP_ARGS(device, drawctxt_id, ibdesc, numibs, timestamp, flags,
+ result, type),
TP_STRUCT__entry(
__string(device_name, device->name)
@@ -48,21 +55,23 @@
__field(unsigned int, timestamp)
__field(unsigned int, flags)
__field(int, result)
+ __field(unsigned int, drawctxt_type)
),
TP_fast_assign(
__assign_str(device_name, device->name);
- __entry->drawctxt_id = cmd->drawctxt_id;
+ __entry->drawctxt_id = drawctxt_id;
__entry->ibdesc_addr = ibdesc[0].gpuaddr;
- __entry->numibs = cmd->numibs;
- __entry->timestamp = cmd->timestamp;
- __entry->flags = cmd->flags;
+ __entry->numibs = numibs;
+ __entry->timestamp = timestamp;
+ __entry->flags = flags;
__entry->result = result;
+ __entry->drawctxt_type = type;
),
TP_printk(
"d_name=%s ctx=%u ib=0x%u numibs=%u timestamp=0x%x "
- "flags=0x%x(%s) result=%d",
+ "flags=0x%x(%s) result=%d type=%s",
__get_str(device_name),
__entry->drawctxt_id,
__entry->ibdesc_addr,
@@ -74,7 +83,9 @@
{ KGSL_CONTEXT_SUBMIT_IB_LIST, "IB_LIST" },
{ KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" })
: "None",
- __entry->result
+ __entry->result,
+ __print_symbolic(__entry->drawctxt_type,
+ ADRENO_DRAWCTXT_TYPES)
)
);
@@ -274,6 +285,32 @@
)
);
+TRACE_EVENT(kgsl_gpubusy,
+ TP_PROTO(struct kgsl_device *device, unsigned int busy,
+ unsigned int elapsed),
+
+ TP_ARGS(device, busy, elapsed),
+
+ TP_STRUCT__entry(
+ __string(device_name, device->name)
+ __field(unsigned int, busy)
+ __field(unsigned int, elapsed)
+ ),
+
+ TP_fast_assign(
+ __assign_str(device_name, device->name);
+ __entry->busy = busy;
+ __entry->elapsed = elapsed;
+ ),
+
+ TP_printk(
+ "d_name=%s busy=%d elapsed=%d",
+ __get_str(device_name),
+ __entry->busy,
+ __entry->elapsed
+ )
+);
+
DECLARE_EVENT_CLASS(kgsl_pwrstate_template,
TP_PROTO(struct kgsl_device *device, unsigned int state),
@@ -317,6 +354,8 @@
__field(unsigned int, size)
__field(unsigned int, tgid)
__array(char, usage, 16)
+ __field(unsigned int, id)
+ __field(unsigned int, flags)
),
TP_fast_assign(
@@ -325,12 +364,76 @@
__entry->tgid = mem_entry->priv->pid;
kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
mem_entry->memdesc.flags);
+ __entry->id = mem_entry->id;
+ __entry->flags = mem_entry->memdesc.flags;
),
TP_printk(
- "gpuaddr=0x%08x size=%d tgid=%d usage=%s",
+ "gpuaddr=0x%08x size=%d tgid=%d usage=%s id=%d flags=0x%08x",
__entry->gpuaddr, __entry->size, __entry->tgid,
- __entry->usage
+ __entry->usage, __entry->id, __entry->flags
+ )
+);
+
+TRACE_EVENT(kgsl_mem_mmap,
+
+ TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+ TP_ARGS(mem_entry),
+
+ TP_STRUCT__entry(
+ __field(unsigned long, useraddr)
+ __field(unsigned int, gpuaddr)
+ __field(unsigned int, size)
+ __array(char, usage, 16)
+ __field(unsigned int, id)
+ __field(unsigned int, flags)
+ ),
+
+ TP_fast_assign(
+ __entry->useraddr = mem_entry->memdesc.useraddr;
+ __entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+ __entry->size = mem_entry->memdesc.size;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
+ __entry->id = mem_entry->id;
+ __entry->flags = mem_entry->memdesc.flags;
+ ),
+
+ TP_printk(
+ "useraddr=%lx gpuaddr=0x%08x size=%d usage=%s id=%d"
+ " flags=0x%08x",
+ __entry->useraddr, __entry->gpuaddr, __entry->size,
+ __entry->usage, __entry->id, __entry->flags
+ )
+);
+
+TRACE_EVENT(kgsl_mem_unmapped_area_collision,
+
+ TP_PROTO(struct kgsl_mem_entry *mem_entry,
+ unsigned long hint,
+ unsigned long len,
+ unsigned long addr),
+
+ TP_ARGS(mem_entry, hint, len, addr),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(unsigned long, hint)
+ __field(unsigned long, len)
+ __field(unsigned long, addr)
+ ),
+
+ TP_fast_assign(
+ __entry->id = mem_entry->id;
+ __entry->hint = hint;
+ __entry->len = len;
+ __entry->addr = addr;
+ ),
+
+ TP_printk(
+ "id=%d hint=0x%lx len=%ld addr=0x%lx",
+ __entry->id, __entry->hint, __entry->len, __entry->addr
)
);
@@ -347,6 +450,7 @@
__field(int, type)
__field(unsigned int, tgid)
__array(char, usage, 16)
+ __field(unsigned int, id)
),
TP_fast_assign(
@@ -357,13 +461,14 @@
__entry->tgid = mem_entry->priv->pid;
kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
mem_entry->memdesc.flags);
+ __entry->id = mem_entry->id;
),
TP_printk(
- "gpuaddr=0x%08x size=%d type=%d fd=%d tgid=%d usage %s",
+ "gpuaddr=0x%08x size=%d type=%d fd=%d tgid=%d usage=%s id=%d",
__entry->gpuaddr, __entry->size,
__entry->type, __entry->fd, __entry->tgid,
- __entry->usage
+ __entry->usage, __entry->id
)
);
@@ -380,6 +485,7 @@
__field(int, fd)
__field(unsigned int, tgid)
__array(char, usage, 16)
+ __field(unsigned int, id)
),
TP_fast_assign(
@@ -389,12 +495,47 @@
__entry->tgid = mem_entry->priv->pid;
kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
mem_entry->memdesc.flags);
+ __entry->id = mem_entry->id;
),
TP_printk(
- "gpuaddr=0x%08x size=%d type=%d tgid=%d usage=%s",
+ "gpuaddr=0x%08x size=%d type=%d tgid=%d usage=%s id=%d",
__entry->gpuaddr, __entry->size, __entry->type,
- __entry->tgid, __entry->usage
+ __entry->tgid, __entry->usage, __entry->id
+ )
+);
+
+TRACE_EVENT(kgsl_mem_sync_cache,
+
+ TP_PROTO(struct kgsl_mem_entry *mem_entry, unsigned int op),
+
+ TP_ARGS(mem_entry, op),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, gpuaddr)
+ __field(unsigned int, size)
+ __array(char, usage, 16)
+ __field(unsigned int, tgid)
+ __field(unsigned int, id)
+ __field(unsigned int, op)
+ ),
+
+ TP_fast_assign(
+ __entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+ __entry->size = mem_entry->memdesc.size;
+ __entry->tgid = mem_entry->priv->pid;
+ __entry->id = mem_entry->id;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
+ __entry->op = op;
+ ),
+
+ TP_printk(
+ "gpuaddr=0x%08x size=%d tgid=%d usage=%s id=%d op=%c%c",
+ __entry->gpuaddr, __entry->size, __entry->tgid, __entry->usage,
+ __entry->id,
+ (__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.',
+ (__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.'
)
);
@@ -411,6 +552,7 @@
__field(unsigned int, size)
__field(int, type)
__array(char, usage, 16)
+ __field(unsigned int, id)
__field(unsigned int, drawctxt_id)
__field(unsigned int, curr_ts)
__field(unsigned int, free_ts)
@@ -422,6 +564,7 @@
__entry->size = mem_entry->memdesc.size;
kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
mem_entry->memdesc.flags);
+ __entry->id = mem_entry->id;
__entry->drawctxt_id = id;
__entry->type = mem_entry->memtype;
__entry->curr_ts = curr_ts;
@@ -429,13 +572,14 @@
),
TP_printk(
- "d_name=%s gpuaddr=0x%08x size=%d type=%d usage=%s ctx=%u"
+ "d_name=%s gpuaddr=0x%08x size=%d type=%d usage=%s id=%d ctx=%u"
" curr_ts=0x%x free_ts=0x%x",
__get_str(device_name),
__entry->gpuaddr,
__entry->size,
__entry->type,
__entry->usage,
+ __entry->id,
__entry->drawctxt_id,
__entry->curr_ts,
__entry->free_ts
@@ -535,6 +679,31 @@
)
);
+TRACE_EVENT(kgsl_regwrite,
+
+ TP_PROTO(struct kgsl_device *device, unsigned int offset,
+ unsigned int value),
+
+ TP_ARGS(device, offset, value),
+
+ TP_STRUCT__entry(
+ __string(device_name, device->name)
+ __field(unsigned int, offset)
+ __field(unsigned int, value)
+ ),
+
+ TP_fast_assign(
+ __assign_str(device_name, device->name);
+ __entry->offset = offset;
+ __entry->value = value;
+ ),
+
+ TP_printk(
+ "d_name=%s reg=%x value=%x",
+ __get_str(device_name), __entry->offset, __entry->value
+ )
+);
+
TRACE_EVENT(kgsl_register_event,
TP_PROTO(unsigned int id, unsigned int timestamp),
TP_ARGS(id, timestamp),
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
index 484630d..a07959b 100644
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -17,6 +17,7 @@
#include "kgsl.h"
#include "kgsl_cffdump.h"
#include "kgsl_sharedmem.h"
+#include "kgsl_trace.h"
#include "z180.h"
#include "z180_reg.h"
@@ -93,7 +94,8 @@
#define Z180_CMDWINDOW_TARGET_SHIFT 0
#define Z180_CMDWINDOW_ADDR_SHIFT 8
-static int z180_start(struct kgsl_device *device, unsigned int init_ram);
+static int z180_init(struct kgsl_device *device);
+static int z180_start(struct kgsl_device *device);
static int z180_stop(struct kgsl_device *device);
static int z180_wait(struct kgsl_device *device,
struct kgsl_context *context,
@@ -212,10 +214,6 @@
queue_work(device->work_queue, &device->ts_expired_ws);
wake_up_interruptible(&device->wait_queue);
-
- atomic_notifier_call_chain(
- &(device->ts_notifier_list),
- device->id, NULL);
}
}
@@ -248,22 +246,26 @@
int result = 0;
struct z180_device *z180_dev = Z180_DEVICE(device);
- result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ result = kgsl_mmu_map_global(pagetable, &device->mmu.setstate_memory);
if (result)
goto error;
- result = kgsl_mmu_map_global(pagetable, &device->memstore,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ result = kgsl_mmu_map_global(pagetable, &device->memstore);
if (result)
goto error_unmap_dummy;
result = kgsl_mmu_map_global(pagetable,
- &z180_dev->ringbuffer.cmdbufdesc,
- GSL_PT_PAGE_RV);
+ &z180_dev->ringbuffer.cmdbufdesc);
if (result)
goto error_unmap_memstore;
+ /*
+ * Set the mpu end to the last "normal" global memory we use.
+ * For the IOMMU, this will be used to restrict access to the
+ * mapped registers.
+ */
+ device->mh.mpu_range = z180_dev->ringbuffer.cmdbufdesc.gpuaddr +
+ z180_dev->ringbuffer.cmdbufdesc.size;
return result;
error_unmap_dummy:
@@ -319,16 +321,11 @@
*p++ = ADDR_VGV3_LAST << 24;
}
-static void z180_cmdstream_start(struct kgsl_device *device, int init_ram)
+static void z180_cmdstream_start(struct kgsl_device *device)
{
struct z180_device *z180_dev = Z180_DEVICE(device);
unsigned int cmd = VGV3_NEXTCMD_JUMP << VGV3_NEXTCMD_NEXTCMD_FSHIFT;
- if (init_ram) {
- z180_dev->timestamp = 0;
- z180_dev->current_timestamp = 0;
- }
-
addmarker(&z180_dev->ringbuffer, 0);
z180_cmdwindow_write(device, ADDR_VGV3_MODE, 4);
@@ -487,6 +484,10 @@
z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd);
z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0);
error:
+
+ trace_kgsl_issueibcmds(device, context->id, ibdesc, numibs,
+ *timestamp, ctrl, result, 0);
+
return (int)result;
}
@@ -495,6 +496,7 @@
struct z180_device *z180_dev = Z180_DEVICE(device);
memset(&z180_dev->ringbuffer, 0, sizeof(struct z180_ringbuffer));
z180_dev->ringbuffer.prevctx = Z180_INVALID_CONTEXT;
+ z180_dev->ringbuffer.cmdbufdesc.flags = KGSL_MEMFLAGS_GPUREADONLY;
return kgsl_allocate_contiguous(&z180_dev->ringbuffer.cmdbufdesc,
Z180_RB_SIZE);
}
@@ -551,7 +553,17 @@
return 0;
}
-static int z180_start(struct kgsl_device *device, unsigned int init_ram)
+static int z180_init(struct kgsl_device *device)
+{
+ struct z180_device *z180_dev = Z180_DEVICE(device);
+
+ z180_dev->timestamp = 0;
+ z180_dev->current_timestamp = 0;
+
+ return 0;
+}
+
+static int z180_start(struct kgsl_device *device)
{
int status = 0;
@@ -568,11 +580,14 @@
if (status)
goto error_clk_off;
- z180_cmdstream_start(device, init_ram);
+ z180_cmdstream_start(device);
mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
device->ftbl->irqctrl(device, 1);
+
+ device->reset_counter++;
+
return 0;
error_clk_off:
@@ -811,9 +826,9 @@
{
int status = -EINVAL;
- /* Don't wait forever, set a max (10 sec) value for now */
+ /* Don't wait forever, set a max of Z180_IDLE_TIMEOUT */
if (msecs == -1)
- msecs = 10 * MSEC_PER_SEC;
+ msecs = Z180_IDLE_TIMEOUT;
mutex_unlock(&device->mutex);
status = z180_wait(device, context, timestamp, msecs);
@@ -915,6 +930,7 @@
.idle = z180_idle,
.isidle = z180_isidle,
.suspend_context = z180_suspend_context,
+ .init = z180_init,
.start = z180_start,
.stop = z180_stop,
.getproperty = z180_getproperty,
diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h
index 268aac3..1be0870 100644
--- a/drivers/gpu/msm/z180.h
+++ b/drivers/gpu/msm/z180.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -29,7 +29,7 @@
#define Z180_DEFAULT_PWRSCALE_POLICY NULL
/* Wait a maximum of 10 seconds when trying to idle the core */
-#define Z180_IDLE_TIMEOUT (10 * 1000)
+#define Z180_IDLE_TIMEOUT (20 * 1000)
struct z180_ringbuffer {
unsigned int prevctx;