Copied caf 2.5.1 video/gpu genlock and rotator [WIP]
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index 6e38325..5a18b0d 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -574,7 +574,7 @@
drm_monitor_supports_rb(struct edid *edid)
{
if (edid->revision >= 4) {
- bool ret = false;
+ bool ret;
drm_for_each_detailed_block((u8 *)edid, is_rb, &ret);
return ret;
}
diff --git a/drivers/gpu/drm/drm_platform.c b/drivers/gpu/drm/drm_platform.c
index 82431dc..fa67ca2 100644
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@@ -3,7 +3,7 @@
*
* Copyright 2003 José Fonseca.
* Copyright 2003 Leif Delgass.
- * Copyright (c) 2009, Code Aurora Forum.
+ * Copyright (c) 2009, The Linux Foundation.
* All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a
diff --git a/drivers/gpu/drm/gma500/psb_device.c b/drivers/gpu/drm/gma500/psb_device.c
index 328a193..95d163e 100644
--- a/drivers/gpu/drm/gma500/psb_device.c
+++ b/drivers/gpu/drm/gma500/psb_device.c
@@ -197,8 +197,7 @@
}
list_for_each_entry(connector, &dev->mode_config.connector_list, head)
- if (connector->funcs->save)
- connector->funcs->save(connector);
+ connector->funcs->save(connector);
mutex_unlock(&dev->mode_config.mutex);
return 0;
@@ -236,8 +235,7 @@
crtc->funcs->restore(crtc);
list_for_each_entry(connector, &dev->mode_config.connector_list, head)
- if (connector->funcs->restore)
- connector->funcs->restore(connector);
+ connector->funcs->restore(connector);
mutex_unlock(&dev->mode_config.mutex);
return 0;
diff --git a/drivers/gpu/drm/gma500/psb_drv.c b/drivers/gpu/drm/gma500/psb_drv.c
index 09af2ff..c34adf9 100644
--- a/drivers/gpu/drm/gma500/psb_drv.c
+++ b/drivers/gpu/drm/gma500/psb_drv.c
@@ -349,7 +349,7 @@
PSB_WSGX32(0x30000000, PSB_CR_BIF_3D_REQ_BASE);
/* igd_opregion_init(&dev_priv->opregion_dev); */
-/* acpi_video_register(); */
+ acpi_video_register();
if (dev_priv->lid_state)
psb_lid_timer_init(dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 26c67a7..afd4e03 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -424,35 +424,14 @@
mutex_unlock(&dev_priv->dev->struct_mutex);
}
-static void gen6_queue_rps_work(struct drm_i915_private *dev_priv,
- u32 pm_iir)
-{
- unsigned long flags;
-
- /*
- * IIR bits should never already be set because IMR should
- * prevent an interrupt from being shown in IIR. The warning
- * displays a case where we've unsafely cleared
- * dev_priv->pm_iir. Although missing an interrupt of the same
- * type is not a problem, it displays a problem in the logic.
- *
- * The mask bit in IMR is cleared by rps_work.
- */
-
- spin_lock_irqsave(&dev_priv->rps_lock, flags);
- dev_priv->pm_iir |= pm_iir;
- I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
- POSTING_READ(GEN6_PMIMR);
- spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
-
- queue_work(dev_priv->wq, &dev_priv->rps_work);
-}
-
-static void pch_irq_handler(struct drm_device *dev, u32 pch_iir)
+static void pch_irq_handler(struct drm_device *dev)
{
drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+ u32 pch_iir;
int pipe;
+ pch_iir = I915_READ(SDEIIR);
+
if (pch_iir & SDE_AUDIO_POWER_MASK)
DRM_DEBUG_DRIVER("PCH audio power change on port %d\n",
(pch_iir & SDE_AUDIO_POWER_MASK) >>
@@ -550,11 +529,19 @@
if (de_iir & DE_PCH_EVENT_IVB) {
if (pch_iir & SDE_HOTPLUG_MASK_CPT)
queue_work(dev_priv->wq, &dev_priv->hotplug_work);
- pch_irq_handler(dev, pch_iir);
+ pch_irq_handler(dev);
}
- if (pm_iir & GEN6_PM_DEFERRED_EVENTS)
- gen6_queue_rps_work(dev_priv, pm_iir);
+ if (pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+ unsigned long flags;
+ spin_lock_irqsave(&dev_priv->rps_lock, flags);
+ WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+ dev_priv->pm_iir |= pm_iir;
+ I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+ POSTING_READ(GEN6_PMIMR);
+ spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+ queue_work(dev_priv->wq, &dev_priv->rps_work);
+ }
/* should clear PCH hotplug event before clear CPU irq */
I915_WRITE(SDEIIR, pch_iir);
@@ -642,7 +629,7 @@
if (de_iir & DE_PCH_EVENT) {
if (pch_iir & hotplug_mask)
queue_work(dev_priv->wq, &dev_priv->hotplug_work);
- pch_irq_handler(dev, pch_iir);
+ pch_irq_handler(dev);
}
if (de_iir & DE_PCU_EVENT) {
@@ -650,8 +637,25 @@
i915_handle_rps_change(dev);
}
- if (IS_GEN6(dev) && pm_iir & GEN6_PM_DEFERRED_EVENTS)
- gen6_queue_rps_work(dev_priv, pm_iir);
+ if (IS_GEN6(dev) && pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+ /*
+ * IIR bits should never already be set because IMR should
+ * prevent an interrupt from being shown in IIR. The warning
+ * displays a case where we've unsafely cleared
+ * dev_priv->pm_iir. Although missing an interrupt of the same
+ * type is not a problem, it displays a problem in the logic.
+ *
+ * The mask bit in IMR is cleared by rps_work.
+ */
+ unsigned long flags;
+ spin_lock_irqsave(&dev_priv->rps_lock, flags);
+ WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+ dev_priv->pm_iir |= pm_iir;
+ I915_WRITE(GEN6_PMIMR, dev_priv->pm_iir);
+ POSTING_READ(GEN6_PMIMR);
+ spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+ queue_work(dev_priv->wq, &dev_priv->rps_work);
+ }
/* should clear PCH hotplug event before clear CPU irq */
I915_WRITE(SDEIIR, pch_iir);
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 29bfd89..b114875 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -615,21 +615,6 @@
#define GEN6_BSD_RNCID 0x12198
-#define GEN7_FF_THREAD_MODE 0x20a0
-#define GEN7_FF_SCHED_MASK 0x0077070
-#define GEN7_FF_TS_SCHED_HS1 (0x5<<16)
-#define GEN7_FF_TS_SCHED_HS0 (0x3<<16)
-#define GEN7_FF_TS_SCHED_LOAD_BALANCE (0x1<<16)
-#define GEN7_FF_TS_SCHED_HW (0x0<<16) /* Default */
-#define GEN7_FF_VS_SCHED_HS1 (0x5<<12)
-#define GEN7_FF_VS_SCHED_HS0 (0x3<<12)
-#define GEN7_FF_VS_SCHED_LOAD_BALANCE (0x1<<12) /* Default */
-#define GEN7_FF_VS_SCHED_HW (0x0<<12)
-#define GEN7_FF_DS_SCHED_HS1 (0x5<<4)
-#define GEN7_FF_DS_SCHED_HS0 (0x3<<4)
-#define GEN7_FF_DS_SCHED_LOAD_BALANCE (0x1<<4) /* Default */
-#define GEN7_FF_DS_SCHED_HW (0x0<<4)
-
/*
* Framebuffer compression (915+ only)
*/
@@ -3752,6 +3737,10 @@
# define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE (1 << 12)
# define GEN6_RCCUNIT_CLOCK_GATE_DISABLE (1 << 11)
+#define GEN6_UCGCTL2 0x9404
+# define GEN6_RCPBUNIT_CLOCK_GATE_DISABLE (1 << 12)
+# define GEN6_RCCUNIT_CLOCK_GATE_DISABLE (1 << 11)
+
#define GEN6_RPNSWREQ 0xA008
#define GEN6_TURBO_DISABLE (1<<31)
#define GEN6_FREQUENCY(x) ((x)<<25)
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index 0d13778..2b5eb22 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -740,11 +740,8 @@
if (HAS_PCH_SPLIT(dev)) {
I915_WRITE(BLC_PWM_PCH_CTL1, dev_priv->saveBLC_PWM_CTL);
I915_WRITE(BLC_PWM_PCH_CTL2, dev_priv->saveBLC_PWM_CTL2);
- /* NOTE: BLC_PWM_CPU_CTL must be written after BLC_PWM_CPU_CTL2;
- * otherwise we get blank eDP screen after S3 on some machines
- */
- I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2);
I915_WRITE(BLC_PWM_CPU_CTL, dev_priv->saveBLC_CPU_PWM_CTL);
+ I915_WRITE(BLC_PWM_CPU_CTL2, dev_priv->saveBLC_CPU_PWM_CTL2);
I915_WRITE(PCH_PP_ON_DELAYS, dev_priv->savePP_ON_DELAYS);
I915_WRITE(PCH_PP_OFF_DELAYS, dev_priv->savePP_OFF_DELAYS);
I915_WRITE(PCH_PP_DIVISOR, dev_priv->savePP_DIVISOR);
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 3de3d9b..1b1cf3b 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -4982,6 +4982,17 @@
continue;
}
+ if (intel_encoder->type == INTEL_OUTPUT_EDP) {
+ /* Use VBT settings if we have an eDP panel */
+ unsigned int edp_bpc = dev_priv->edp.bpp / 3;
+
+ if (edp_bpc < display_bpc) {
+ DRM_DEBUG_KMS("clamping display bpc (was %d) to eDP (%d)\n", display_bpc, edp_bpc);
+ display_bpc = edp_bpc;
+ }
+ continue;
+ }
+
/* Not one of the known troublemakers, check the EDID */
list_for_each_entry(connector, &dev->mode_config.connector_list,
head) {
@@ -7606,11 +7617,10 @@
{
struct drm_i915_private *dev_priv = dev->dev_private;
u32 reg, val;
- int i;
/* Clear any frame start delays used for debugging left by the BIOS */
- for_each_pipe(i) {
- reg = PIPECONF(i);
+ for_each_pipe(pipe) {
+ reg = PIPECONF(pipe);
I915_WRITE(reg, I915_READ(reg) & ~PIPECONF_FRAME_START_DELAY_MASK);
}
@@ -8357,7 +8367,7 @@
I915_WRITE(GEN6_RP_IDLE_HYSTERSIS, 10);
I915_WRITE(GEN6_RP_CONTROL,
GEN6_RP_MEDIA_TURBO |
- GEN6_RP_MEDIA_HW_NORMAL_MODE |
+ GEN6_RP_MEDIA_HW_MODE |
GEN6_RP_MEDIA_IS_GFX |
GEN6_RP_ENABLE |
GEN6_RP_UP_BUSY_AVG |
@@ -8602,18 +8612,6 @@
}
}
-static void gen7_setup_fixed_func_scheduler(struct drm_i915_private *dev_priv)
-{
- uint32_t reg = I915_READ(GEN7_FF_THREAD_MODE);
-
- reg &= ~GEN7_FF_SCHED_MASK;
- reg |= GEN7_FF_TS_SCHED_HW;
- reg |= GEN7_FF_VS_SCHED_HW;
- reg |= GEN7_FF_DS_SCHED_HW;
-
- I915_WRITE(GEN7_FF_THREAD_MODE, reg);
-}
-
static void ivybridge_init_clock_gating(struct drm_device *dev)
{
struct drm_i915_private *dev_priv = dev->dev_private;
@@ -8658,8 +8656,6 @@
DISPPLANE_TRICKLE_FEED_DISABLE);
intel_flush_display_plane(dev_priv, pipe);
}
-
- gen7_setup_fixed_func_scheduler(dev_priv);
}
static void g4x_init_clock_gating(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 069725c..4b63791 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -368,7 +368,7 @@
int recv_bytes;
uint32_t status;
uint32_t aux_clock_divider;
- int try, precharge;
+ int try, precharge = 5;
intel_dp_check_edp(intel_dp);
/* The clock divider is based off the hrawclk,
@@ -388,11 +388,6 @@
else
aux_clock_divider = intel_hrawclk(dev) / 2;
- if (IS_GEN6(dev))
- precharge = 3;
- else
- precharge = 5;
-
/* Try to wait for any previous AUX channel activity */
for (try = 0; try < 3; try++) {
status = I915_READ(ch_ctl);
@@ -712,8 +707,8 @@
bpp = adjusted_mode->private_flags & INTEL_MODE_DP_FORCE_6BPC ? 18 : 24;
- for (clock = 0; clock <= max_clock; clock++) {
- for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) {
+ for (lane_count = 1; lane_count <= max_lane_count; lane_count <<= 1) {
+ for (clock = 0; clock <= max_clock; clock++) {
int link_avail = intel_dp_max_data_rate(intel_dp_link_clock(bws[clock]), lane_count);
if (intel_dp_link_required(mode->clock, bpp)
@@ -1153,17 +1148,13 @@
DRM_DEBUG_KMS("Turn eDP power off\n");
- WARN(!intel_dp->want_panel_vdd, "Need VDD to turn off panel\n");
+ WARN(intel_dp->want_panel_vdd, "Cannot turn power off while VDD is on\n");
pp = ironlake_get_pp_control(dev_priv);
- /* We need to switch off panel power _and_ force vdd, for otherwise some
- * panels get very unhappy and cease to work. */
pp &= ~(POWER_TARGET_ON | EDP_FORCE_VDD | PANEL_POWER_RESET | EDP_BLC_ENABLE);
I915_WRITE(PCH_PP_CONTROL, pp);
POSTING_READ(PCH_PP_CONTROL);
- intel_dp->want_panel_vdd = false;
-
ironlake_wait_panel_off(intel_dp);
}
@@ -1268,14 +1259,18 @@
{
struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
-
- /* Make sure the panel is off before trying to change the mode. But also
- * ensure that we have vdd while we switch off the panel. */
- ironlake_edp_panel_vdd_on(intel_dp);
ironlake_edp_backlight_off(intel_dp);
- intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
ironlake_edp_panel_off(intel_dp);
+
+ /* Wake up the sink first */
+ ironlake_edp_panel_vdd_on(intel_dp);
+ intel_dp_sink_dpms(intel_dp, DRM_MODE_DPMS_ON);
intel_dp_link_down(intel_dp);
+ ironlake_edp_panel_vdd_off(intel_dp, false);
+
+ /* Make sure the panel is off before trying to
+ * change the mode
+ */
}
static void intel_dp_commit(struct drm_encoder *encoder)
@@ -1307,12 +1302,13 @@
uint32_t dp_reg = I915_READ(intel_dp->output_reg);
if (mode != DRM_MODE_DPMS_ON) {
- /* Switching the panel off requires vdd. */
- ironlake_edp_panel_vdd_on(intel_dp);
ironlake_edp_backlight_off(intel_dp);
- intel_dp_sink_dpms(intel_dp, mode);
ironlake_edp_panel_off(intel_dp);
+
+ ironlake_edp_panel_vdd_on(intel_dp);
+ intel_dp_sink_dpms(intel_dp, mode);
intel_dp_link_down(intel_dp);
+ ironlake_edp_panel_vdd_off(intel_dp, false);
if (is_cpu_edp(intel_dp))
ironlake_edp_pll_off(encoder);
diff --git a/drivers/gpu/drm/i915/intel_lvds.c b/drivers/gpu/drm/i915/intel_lvds.c
index 9fadd64..9c71183 100644
--- a/drivers/gpu/drm/i915/intel_lvds.c
+++ b/drivers/gpu/drm/i915/intel_lvds.c
@@ -747,14 +747,6 @@
},
{
.callback = intel_no_lvds_dmi_callback,
- .ident = "Hewlett-Packard HP t5740e Thin Client",
- .matches = {
- DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
- DMI_MATCH(DMI_PRODUCT_NAME, "HP t5740e Thin Client"),
- },
- },
- {
- .callback = intel_no_lvds_dmi_callback,
.ident = "Hewlett-Packard t5745",
.matches = {
DMI_MATCH(DMI_BOARD_VENDOR, "Hewlett-Packard"),
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 12a9e5f..62892a8 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -258,6 +258,8 @@
I915_WRITE_HEAD(ring, 0);
ring->write_tail(ring, 0);
+ /* Initialize the ring. */
+ I915_WRITE_START(ring, obj->gtt_offset);
head = I915_READ_HEAD(ring) & HEAD_ADDR;
/* G45 ring initialization fails to reset head to zero */
@@ -283,11 +285,6 @@
}
}
- /* Initialize the ring. This must happen _after_ we've cleared the ring
- * registers with the above sequence (the readback of the HEAD registers
- * also enforces ordering), otherwise the hw might lose the new ring
- * register values. */
- I915_WRITE_START(ring, obj->gtt_offset);
I915_WRITE_CTL(ring,
((ring->size - PAGE_SIZE) & RING_NR_PAGES)
| RING_VALID);
@@ -312,7 +309,6 @@
ring->head = I915_READ_HEAD(ring);
ring->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
ring->space = ring_space(ring);
- ring->last_retired_head = -1;
}
return 0;
@@ -1030,10 +1026,6 @@
if (ret)
goto err_unref;
- ret = i915_gem_object_set_to_gtt_domain(obj, true);
- if (ret)
- goto err_unpin;
-
ring->map.size = ring->size;
ring->map.offset = dev->agp->base + obj->gtt_offset;
ring->map.type = 0;
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index eea58c6..ae5e748 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -769,12 +769,10 @@
((v_sync_len & 0x30) >> 4);
dtd->part2.dtd_flags = 0x18;
- if (mode->flags & DRM_MODE_FLAG_INTERLACE)
- dtd->part2.dtd_flags |= DTD_FLAG_INTERLACE;
if (mode->flags & DRM_MODE_FLAG_PHSYNC)
- dtd->part2.dtd_flags |= DTD_FLAG_HSYNC_POSITIVE;
+ dtd->part2.dtd_flags |= 0x2;
if (mode->flags & DRM_MODE_FLAG_PVSYNC)
- dtd->part2.dtd_flags |= DTD_FLAG_VSYNC_POSITIVE;
+ dtd->part2.dtd_flags |= 0x4;
dtd->part2.sdvo_flags = 0;
dtd->part2.v_sync_off_high = v_sync_offset & 0xc0;
@@ -808,11 +806,9 @@
mode->clock = dtd->part1.clock * 10;
mode->flags &= ~(DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC);
- if (dtd->part2.dtd_flags & DTD_FLAG_INTERLACE)
- mode->flags |= DRM_MODE_FLAG_INTERLACE;
- if (dtd->part2.dtd_flags & DTD_FLAG_HSYNC_POSITIVE)
+ if (dtd->part2.dtd_flags & 0x2)
mode->flags |= DRM_MODE_FLAG_PHSYNC;
- if (dtd->part2.dtd_flags & DTD_FLAG_VSYNC_POSITIVE)
+ if (dtd->part2.dtd_flags & 0x4)
mode->flags |= DRM_MODE_FLAG_PVSYNC;
}
diff --git a/drivers/gpu/drm/i915/intel_sdvo_regs.h b/drivers/gpu/drm/i915/intel_sdvo_regs.h
index 9d03014..6b7b22f 100644
--- a/drivers/gpu/drm/i915/intel_sdvo_regs.h
+++ b/drivers/gpu/drm/i915/intel_sdvo_regs.h
@@ -61,11 +61,6 @@
u16 output_flags;
} __attribute__((packed));
-/* Note: SDVO detailed timing flags match EDID misc flags. */
-#define DTD_FLAG_HSYNC_POSITIVE (1 << 1)
-#define DTD_FLAG_VSYNC_POSITIVE (1 << 2)
-#define DTD_FLAG_INTERLACE (1 << 7)
-
/** This matches the EDID DTD structure, more or less */
struct intel_sdvo_dtd {
struct {
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index c82b1d4..05f765e 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -674,54 +674,6 @@
.filter_table = filter_table,
},
{
- .name = "480p",
- .clock = 107520,
- .refresh = 59940,
- .oversample = TV_OVERSAMPLE_4X,
- .component_only = 1,
-
- .hsync_end = 64, .hblank_end = 122,
- .hblank_start = 842, .htotal = 857,
-
- .progressive = true, .trilevel_sync = false,
-
- .vsync_start_f1 = 12, .vsync_start_f2 = 12,
- .vsync_len = 12,
-
- .veq_ena = false,
-
- .vi_end_f1 = 44, .vi_end_f2 = 44,
- .nbr_end = 479,
-
- .burst_ena = false,
-
- .filter_table = filter_table,
- },
- {
- .name = "576p",
- .clock = 107520,
- .refresh = 50000,
- .oversample = TV_OVERSAMPLE_4X,
- .component_only = 1,
-
- .hsync_end = 64, .hblank_end = 139,
- .hblank_start = 859, .htotal = 863,
-
- .progressive = true, .trilevel_sync = false,
-
- .vsync_start_f1 = 10, .vsync_start_f2 = 10,
- .vsync_len = 10,
-
- .veq_ena = false,
-
- .vi_end_f1 = 48, .vi_end_f2 = 48,
- .nbr_end = 575,
-
- .burst_ena = false,
-
- .filter_table = filter_table,
- },
- {
.name = "720p@60Hz",
.clock = 148800,
.refresh = 60000,
@@ -1233,11 +1185,6 @@
I915_WRITE(TV_DAC, save_tv_dac & ~TVDAC_STATE_CHG_EN);
I915_WRITE(TV_CTL, save_tv_ctl);
- POSTING_READ(TV_CTL);
-
- /* For unknown reasons the hw barfs if we don't do this vblank wait. */
- intel_wait_for_vblank(intel_tv->base.base.dev,
- to_intel_crtc(intel_tv->base.base.crtc)->pipe);
/* Restore interrupt config */
if (connector->polled & DRM_CONNECTOR_POLL_HPD) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 12ce044..7d15a77 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1030,7 +1030,7 @@
nvbo->placement.fpfn = 0;
nvbo->placement.lpfn = dev_priv->fb_mappable_pages;
- nouveau_bo_placement_set(nvbo, TTM_PL_FLAG_VRAM, 0);
+ nouveau_bo_placement_set(nvbo, TTM_PL_VRAM, 0);
return nouveau_bo_validate(nvbo, false, true, false);
}
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 7b11edb..fa86035 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -654,13 +654,7 @@
if (nv_connector->edid && connector->display_info.bpc)
return;
- /* EDID 1.4 is *supposed* to be supported on eDP, but, Apple... */
- if (nv_connector->type == DCB_CONNECTOR_eDP) {
- connector->display_info.bpc = 6;
- return;
- }
-
- /* we're out of options unless we're LVDS, default to 8bpc */
+ /* if not, we're out of options unless we're LVDS, default to 8bpc */
if (nv_encoder->dcb->type != OUTPUT_LVDS) {
connector->display_info.bpc = 8;
return;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 6fd2211..8113e92 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -497,7 +497,7 @@
nfbdev->helper.funcs = &nouveau_fbcon_helper_funcs;
ret = drm_fb_helper_init(dev, &nfbdev->helper,
- dev->mode_config.num_crtc, 4);
+ nv_two_heads(dev) ? 2 : 1, 4);
if (ret) {
kfree(nfbdev);
return ret;
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc
index 219850d..abc3662 100644
--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc
@@ -119,9 +119,9 @@
// mthd 0x030c-0x0340, various stuff
.b16 0xc3 14
.b32 #ctx_src_address_high ~0x000000ff
-.b32 #ctx_src_address_low ~0xffffffff
+.b32 #ctx_src_address_low ~0xfffffff0
.b32 #ctx_dst_address_high ~0x000000ff
-.b32 #ctx_dst_address_low ~0xffffffff
+.b32 #ctx_dst_address_low ~0xfffffff0
.b32 #ctx_src_pitch ~0x0007ffff
.b32 #ctx_dst_pitch ~0x0007ffff
.b32 #ctx_xcnt ~0x0000ffff
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
index 37d6de3..1f33fbd 100644
--- a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
@@ -1,72 +1,4 @@
-u32 nva3_pcopy_data[] = {
-/* 0x0000: ctx_object */
- 0x00000000,
-/* 0x0004: ctx_dma */
-/* 0x0004: ctx_dma_query */
- 0x00000000,
-/* 0x0008: ctx_dma_src */
- 0x00000000,
-/* 0x000c: ctx_dma_dst */
- 0x00000000,
-/* 0x0010: ctx_query_address_high */
- 0x00000000,
-/* 0x0014: ctx_query_address_low */
- 0x00000000,
-/* 0x0018: ctx_query_counter */
- 0x00000000,
-/* 0x001c: ctx_src_address_high */
- 0x00000000,
-/* 0x0020: ctx_src_address_low */
- 0x00000000,
-/* 0x0024: ctx_src_pitch */
- 0x00000000,
-/* 0x0028: ctx_src_tile_mode */
- 0x00000000,
-/* 0x002c: ctx_src_xsize */
- 0x00000000,
-/* 0x0030: ctx_src_ysize */
- 0x00000000,
-/* 0x0034: ctx_src_zsize */
- 0x00000000,
-/* 0x0038: ctx_src_zoff */
- 0x00000000,
-/* 0x003c: ctx_src_xoff */
- 0x00000000,
-/* 0x0040: ctx_src_yoff */
- 0x00000000,
-/* 0x0044: ctx_src_cpp */
- 0x00000000,
-/* 0x0048: ctx_dst_address_high */
- 0x00000000,
-/* 0x004c: ctx_dst_address_low */
- 0x00000000,
-/* 0x0050: ctx_dst_pitch */
- 0x00000000,
-/* 0x0054: ctx_dst_tile_mode */
- 0x00000000,
-/* 0x0058: ctx_dst_xsize */
- 0x00000000,
-/* 0x005c: ctx_dst_ysize */
- 0x00000000,
-/* 0x0060: ctx_dst_zsize */
- 0x00000000,
-/* 0x0064: ctx_dst_zoff */
- 0x00000000,
-/* 0x0068: ctx_dst_xoff */
- 0x00000000,
-/* 0x006c: ctx_dst_yoff */
- 0x00000000,
-/* 0x0070: ctx_dst_cpp */
- 0x00000000,
-/* 0x0074: ctx_format */
- 0x00000000,
-/* 0x0078: ctx_swz_const0 */
- 0x00000000,
-/* 0x007c: ctx_swz_const1 */
- 0x00000000,
-/* 0x0080: ctx_xcnt */
- 0x00000000,
-/* 0x0084: ctx_ycnt */
+uint32_t nva3_pcopy_data[] = {
0x00000000,
0x00000000,
0x00000000,
@@ -98,7 +30,39 @@
0x00000000,
0x00000000,
0x00000000,
-/* 0x0100: dispatch_table */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
0x00010000,
0x00000000,
0x00000000,
@@ -109,7 +73,6 @@
0x00010162,
0x00000000,
0x00030060,
-/* 0x0128: dispatch_dma */
0x00010170,
0x00000000,
0x00010170,
@@ -155,11 +118,11 @@
0x0000001c,
0xffffff00,
0x00000020,
- 0x00000000,
+ 0x0000000f,
0x00000048,
0xffffff00,
0x0000004c,
- 0x00000000,
+ 0x0000000f,
0x00000024,
0xfff80000,
0x00000050,
@@ -183,8 +146,7 @@
0x00000800,
};
-u32 nva3_pcopy_code[] = {
-/* 0x0000: main */
+uint32_t nva3_pcopy_code[] = {
0x04fe04bd,
0x3517f000,
0xf10010fe,
@@ -196,31 +158,23 @@
0x17f11031,
0x27f01200,
0x0012d003,
-/* 0x002f: spin */
0xf40031f4,
0x0ef40028,
-/* 0x0035: ih */
0x8001cffd,
0xf40812c4,
0x21f4060b,
-/* 0x0041: ih_no_chsw */
0x0412c472,
0xf4060bf4,
-/* 0x004a: ih_no_cmd */
0x11c4c321,
0x4001d00c,
-/* 0x0052: swctx */
0x47f101f8,
0x4bfe7700,
0x0007fe00,
0xf00204b9,
0x01f40643,
0x0604fa09,
-/* 0x006b: swctx_load */
0xfa060ef4,
-/* 0x006e: swctx_done */
0x03f80504,
-/* 0x0072: chsw */
0x27f100f8,
0x23cf1400,
0x1e3fc800,
@@ -229,22 +183,18 @@
0x1e3af052,
0xf00023d0,
0x24d00147,
-/* 0x0093: chsw_no_unload */
0xcf00f880,
0x3dc84023,
0x220bf41e,
0xf40131f4,
0x57f05221,
0x0367f004,
-/* 0x00a8: chsw_load_ctx_dma */
0xa07856bc,
0xb6018068,
0x87d00884,
0x0162b600,
-/* 0x00bb: chsw_finish_load */
0xf0f018f4,
0x23d00237,
-/* 0x00c3: dispatch */
0xf100f880,
0xcf190037,
0x33cf4032,
@@ -252,7 +202,6 @@
0x1024b607,
0x010057f1,
0x74bd64bd,
-/* 0x00dc: dispatch_loop */
0x58005658,
0x50b60157,
0x0446b804,
@@ -262,7 +211,6 @@
0xb60276bb,
0x57bb0374,
0xdf0ef400,
-/* 0x0100: dispatch_valid_mthd */
0xb60246bb,
0x45bb0344,
0x01459800,
@@ -272,41 +220,31 @@
0xb0014658,
0x1bf40064,
0x00538009,
-/* 0x0127: dispatch_cmd */
0xf4300ef4,
0x55f90132,
0xf40c01f4,
-/* 0x0132: dispatch_invalid_bitfield */
0x25f0250e,
-/* 0x0135: dispatch_illegal_mthd */
0x0125f002,
-/* 0x0138: dispatch_error */
0x100047f1,
0xd00042d0,
0x27f04043,
0x0002d040,
-/* 0x0148: hostirq_wait */
0xf08002cf,
0x24b04024,
0xf71bf400,
-/* 0x0154: dispatch_done */
0x1d0027f1,
0xd00137f0,
0x00f80023,
-/* 0x0160: cmd_nop */
-/* 0x0162: cmd_pm_trigger */
0x27f100f8,
0x34bd2200,
0xd00233f0,
0x00f80023,
-/* 0x0170: cmd_dma */
0x012842b7,
0xf00145b6,
0x43801e39,
0x0040b701,
0x0644b606,
0xf80043d0,
-/* 0x0189: cmd_exec_set_format */
0xf030f400,
0xb00001b0,
0x01b00101,
@@ -318,26 +256,20 @@
0x70b63847,
0x0232f401,
0x94bd84bd,
-/* 0x01b4: ncomp_loop */
0xb60f4ac4,
0xb4bd0445,
-/* 0x01bc: bpc_loop */
0xf404a430,
0xa5ff0f18,
0x00cbbbc0,
0xf40231f4,
-/* 0x01ce: cmp_c0 */
0x1bf4220e,
0x10c7f00c,
0xf400cbbb,
-/* 0x01da: cmp_c1 */
0xa430160e,
0x0c18f406,
0xbb14c7f0,
0x0ef400cb,
-/* 0x01e9: cmp_zero */
0x80c7f107,
-/* 0x01ed: bpc_next */
0x01c83800,
0xb60180b6,
0xb5b801b0,
@@ -348,7 +280,6 @@
0x98110680,
0x68fd2008,
0x0502f400,
-/* 0x0216: dst_xcnt */
0x75fd64bd,
0x1c078000,
0xf10078fd,
@@ -373,7 +304,6 @@
0x980056d0,
0x56d01f06,
0x1030f440,
-/* 0x0276: cmd_exec_set_surface_tiled */
0x579800f8,
0x6879c70a,
0xb66478c7,
@@ -381,11 +311,9 @@
0x0e76b060,
0xf0091bf4,
0x0ef40477,
-/* 0x0291: xtile64 */
0x027cf00f,
0xfd1170b6,
0x77f00947,
-/* 0x029d: xtileok */
0x0f5a9806,
0xfd115b98,
0xb7f000ab,
@@ -443,7 +371,6 @@
0x67d00600,
0x0060b700,
0x0068d004,
-/* 0x0382: cmd_exec_set_surface_linear */
0x6cf000f8,
0x0260b702,
0x0864b602,
@@ -454,16 +381,13 @@
0xb70067d0,
0x98040060,
0x67d00957,
-/* 0x03ab: cmd_exec_wait */
0xf900f800,
0xf110f900,
0xb6080007,
-/* 0x03b6: loop */
0x01cf0604,
0x0114f000,
0xfcfa1bf4,
0xf800fc10,
-/* 0x03c5: cmd_exec_query */
0x0d34c800,
0xf5701bf4,
0xf103ab21,
@@ -493,7 +417,6 @@
0x47f10153,
0x44b60800,
0x0045d006,
-/* 0x0438: query_counter */
0x03ab21f5,
0x080c47f1,
0x980644b6,
@@ -516,13 +439,11 @@
0x47f10153,
0x44b60800,
0x0045d006,
-/* 0x0492: cmd_exec */
0x21f500f8,
0x3fc803ab,
0x0e0bf400,
0x018921f5,
0x020047f1,
-/* 0x04a7: cmd_exec_no_format */
0xf11e0ef4,
0xb6081067,
0x77f00664,
@@ -530,24 +451,19 @@
0x981c0780,
0x67d02007,
0x4067d000,
-/* 0x04c2: cmd_exec_init_src_surface */
0x32f444bd,
0xc854bd02,
0x0bf4043f,
0x8221f50a,
0x0a0ef403,
-/* 0x04d4: src_tiled */
0x027621f5,
-/* 0x04db: cmd_exec_init_dst_surface */
0xf40749f0,
0x57f00231,
0x083fc82c,
0xf50a0bf4,
0xf4038221,
-/* 0x04ee: dst_tiled */
0x21f50a0e,
0x49f00276,
-/* 0x04f5: cmd_exec_kick */
0x0057f108,
0x0654b608,
0xd0210698,
@@ -557,8 +473,6 @@
0xc80054d0,
0x0bf40c3f,
0xc521f507,
-/* 0x0519: cmd_exec_done */
-/* 0x051b: cmd_wrcache_flush */
0xf100f803,
0xbd220027,
0x0133f034,
diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
index cd879f3..a8d1745 100644
--- a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
+++ b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
@@ -1,65 +1,4 @@
-u32 nvc0_pcopy_data[] = {
-/* 0x0000: ctx_object */
- 0x00000000,
-/* 0x0004: ctx_query_address_high */
- 0x00000000,
-/* 0x0008: ctx_query_address_low */
- 0x00000000,
-/* 0x000c: ctx_query_counter */
- 0x00000000,
-/* 0x0010: ctx_src_address_high */
- 0x00000000,
-/* 0x0014: ctx_src_address_low */
- 0x00000000,
-/* 0x0018: ctx_src_pitch */
- 0x00000000,
-/* 0x001c: ctx_src_tile_mode */
- 0x00000000,
-/* 0x0020: ctx_src_xsize */
- 0x00000000,
-/* 0x0024: ctx_src_ysize */
- 0x00000000,
-/* 0x0028: ctx_src_zsize */
- 0x00000000,
-/* 0x002c: ctx_src_zoff */
- 0x00000000,
-/* 0x0030: ctx_src_xoff */
- 0x00000000,
-/* 0x0034: ctx_src_yoff */
- 0x00000000,
-/* 0x0038: ctx_src_cpp */
- 0x00000000,
-/* 0x003c: ctx_dst_address_high */
- 0x00000000,
-/* 0x0040: ctx_dst_address_low */
- 0x00000000,
-/* 0x0044: ctx_dst_pitch */
- 0x00000000,
-/* 0x0048: ctx_dst_tile_mode */
- 0x00000000,
-/* 0x004c: ctx_dst_xsize */
- 0x00000000,
-/* 0x0050: ctx_dst_ysize */
- 0x00000000,
-/* 0x0054: ctx_dst_zsize */
- 0x00000000,
-/* 0x0058: ctx_dst_zoff */
- 0x00000000,
-/* 0x005c: ctx_dst_xoff */
- 0x00000000,
-/* 0x0060: ctx_dst_yoff */
- 0x00000000,
-/* 0x0064: ctx_dst_cpp */
- 0x00000000,
-/* 0x0068: ctx_format */
- 0x00000000,
-/* 0x006c: ctx_swz_const0 */
- 0x00000000,
-/* 0x0070: ctx_swz_const1 */
- 0x00000000,
-/* 0x0074: ctx_xcnt */
- 0x00000000,
-/* 0x0078: ctx_ycnt */
+uint32_t nvc0_pcopy_data[] = {
0x00000000,
0x00000000,
0x00000000,
@@ -94,7 +33,36 @@
0x00000000,
0x00000000,
0x00000000,
-/* 0x0100: dispatch_table */
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
+ 0x00000000,
0x00010000,
0x00000000,
0x00000000,
@@ -143,11 +111,11 @@
0x00000010,
0xffffff00,
0x00000014,
- 0x00000000,
+ 0x0000000f,
0x0000003c,
0xffffff00,
0x00000040,
- 0x00000000,
+ 0x0000000f,
0x00000018,
0xfff80000,
0x00000044,
@@ -171,8 +139,7 @@
0x00000800,
};
-u32 nvc0_pcopy_code[] = {
-/* 0x0000: main */
+uint32_t nvc0_pcopy_code[] = {
0x04fe04bd,
0x3517f000,
0xf10010fe,
@@ -184,20 +151,15 @@
0x17f11031,
0x27f01200,
0x0012d003,
-/* 0x002f: spin */
0xf40031f4,
0x0ef40028,
-/* 0x0035: ih */
0x8001cffd,
0xf40812c4,
0x21f4060b,
-/* 0x0041: ih_no_chsw */
0x0412c4ca,
0xf5070bf4,
-/* 0x004b: ih_no_cmd */
0xc4010221,
0x01d00c11,
-/* 0x0053: swctx */
0xf101f840,
0xfe770047,
0x47f1004b,
@@ -226,11 +188,8 @@
0xf00204b9,
0x01f40643,
0x0604fa09,
-/* 0x00c3: swctx_load */
0xfa060ef4,
-/* 0x00c6: swctx_done */
0x03f80504,
-/* 0x00ca: chsw */
0x27f100f8,
0x23cf1400,
0x1e3fc800,
@@ -239,22 +198,18 @@
0x1e3af053,
0xf00023d0,
0x24d00147,
-/* 0x00eb: chsw_no_unload */
0xcf00f880,
0x3dc84023,
0x090bf41e,
0xf40131f4,
-/* 0x00fa: chsw_finish_load */
0x37f05321,
0x8023d002,
-/* 0x0102: dispatch */
0x37f100f8,
0x32cf1900,
0x0033cf40,
0x07ff24e4,
0xf11024b6,
0xbd010057,
-/* 0x011b: dispatch_loop */
0x5874bd64,
0x57580056,
0x0450b601,
@@ -264,7 +219,6 @@
0xbb0f08f4,
0x74b60276,
0x0057bb03,
-/* 0x013f: dispatch_valid_mthd */
0xbbdf0ef4,
0x44b60246,
0x0045bb03,
@@ -275,33 +229,24 @@
0x64b00146,
0x091bf400,
0xf4005380,
-/* 0x0166: dispatch_cmd */
0x32f4300e,
0xf455f901,
0x0ef40c01,
-/* 0x0171: dispatch_invalid_bitfield */
0x0225f025,
-/* 0x0174: dispatch_illegal_mthd */
-/* 0x0177: dispatch_error */
0xf10125f0,
0xd0100047,
0x43d00042,
0x4027f040,
-/* 0x0187: hostirq_wait */
0xcf0002d0,
0x24f08002,
0x0024b040,
-/* 0x0193: dispatch_done */
0xf1f71bf4,
0xf01d0027,
0x23d00137,
-/* 0x019f: cmd_nop */
0xf800f800,
-/* 0x01a1: cmd_pm_trigger */
0x0027f100,
0xf034bd22,
0x23d00233,
-/* 0x01af: cmd_exec_set_format */
0xf400f800,
0x01b0f030,
0x0101b000,
@@ -313,26 +258,20 @@
0x3847c701,
0xf40170b6,
0x84bd0232,
-/* 0x01da: ncomp_loop */
0x4ac494bd,
0x0445b60f,
-/* 0x01e2: bpc_loop */
0xa430b4bd,
0x0f18f404,
0xbbc0a5ff,
0x31f400cb,
0x220ef402,
-/* 0x01f4: cmp_c0 */
0xf00c1bf4,
0xcbbb10c7,
0x160ef400,
-/* 0x0200: cmp_c1 */
0xf406a430,
0xc7f00c18,
0x00cbbb14,
-/* 0x020f: cmp_zero */
0xf1070ef4,
-/* 0x0213: bpc_next */
0x380080c7,
0x80b601c8,
0x01b0b601,
@@ -344,7 +283,6 @@
0x1d08980e,
0xf40068fd,
0x64bd0502,
-/* 0x023c: dst_xcnt */
0x800075fd,
0x78fd1907,
0x1057f100,
@@ -369,18 +307,15 @@
0x1c069800,
0xf44056d0,
0x00f81030,
-/* 0x029c: cmd_exec_set_surface_tiled */
0xc7075798,
0x78c76879,
0x0380b664,
0xb06077c7,
0x1bf40e76,
0x0477f009,
-/* 0x02b7: xtile64 */
0xf00f0ef4,
0x70b6027c,
0x0947fd11,
-/* 0x02c3: xtileok */
0x980677f0,
0x5b980c5a,
0x00abfd0e,
@@ -439,7 +374,6 @@
0xb70067d0,
0xd0040060,
0x00f80068,
-/* 0x03a8: cmd_exec_set_surface_linear */
0xb7026cf0,
0xb6020260,
0x57980864,
@@ -450,15 +384,12 @@
0x0060b700,
0x06579804,
0xf80067d0,
-/* 0x03d1: cmd_exec_wait */
0xf900f900,
0x0007f110,
0x0604b608,
-/* 0x03dc: loop */
0xf00001cf,
0x1bf40114,
0xfc10fcfa,
-/* 0x03eb: cmd_exec_query */
0xc800f800,
0x1bf40d34,
0xd121f570,
@@ -488,7 +419,6 @@
0x0153f026,
0x080047f1,
0xd00644b6,
-/* 0x045e: query_counter */
0x21f50045,
0x47f103d1,
0x44b6080c,
@@ -512,13 +442,11 @@
0x080047f1,
0xd00644b6,
0x00f80045,
-/* 0x04b8: cmd_exec */
0x03d121f5,
0xf4003fc8,
0x21f50e0b,
0x47f101af,
0x0ef40200,
-/* 0x04cd: cmd_exec_no_format */
0x1067f11e,
0x0664b608,
0x800177f0,
@@ -526,23 +454,18 @@
0x1d079819,
0xd00067d0,
0x44bd4067,
-/* 0x04e8: cmd_exec_init_src_surface */
0xbd0232f4,
0x043fc854,
0xf50a0bf4,
0xf403a821,
-/* 0x04fa: src_tiled */
0x21f50a0e,
0x49f0029c,
-/* 0x0501: cmd_exec_init_dst_surface */
0x0231f407,
0xc82c57f0,
0x0bf4083f,
0xa821f50a,
0x0a0ef403,
-/* 0x0514: dst_tiled */
0x029c21f5,
-/* 0x051b: cmd_exec_kick */
0xf10849f0,
0xb6080057,
0x06980654,
@@ -552,9 +475,7 @@
0x54d00546,
0x0c3fc800,
0xf5070bf4,
-/* 0x053f: cmd_exec_done */
0xf803eb21,
-/* 0x0541: cmd_wrcache_flush */
0x0027f100,
0xf034bd22,
0x23d00133,
diff --git a/drivers/gpu/drm/nouveau/nvd0_display.c b/drivers/gpu/drm/nouveau/nvd0_display.c
index 8a555fb..0247250 100644
--- a/drivers/gpu/drm/nouveau/nvd0_display.c
+++ b/drivers/gpu/drm/nouveau/nvd0_display.c
@@ -790,7 +790,7 @@
struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
int ch = EVO_CURS(nv_crtc->index);
- evo_piow(crtc->dev, ch, 0x0084, (y << 16) | (x & 0xffff));
+ evo_piow(crtc->dev, ch, 0x0084, (y << 16) | x);
evo_piow(crtc->dev, ch, 0x0080, 0x00000000);
return 0;
}
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index a53ca30..af1054f 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -259,7 +259,7 @@
/* adjust pm to dpms changes BEFORE enabling crtcs */
radeon_pm_compute_clocks(rdev);
/* disable crtc pair power gating before programming */
- if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set)
+ if (ASIC_IS_DCE6(rdev))
atombios_powergate_crtc(crtc, ATOM_DISABLE);
atombios_enable_crtc(crtc, ATOM_ENABLE);
if (ASIC_IS_DCE3(rdev) && !ASIC_IS_DCE6(rdev))
@@ -279,7 +279,7 @@
atombios_enable_crtc(crtc, ATOM_DISABLE);
radeon_crtc->enabled = false;
/* power gating is per-pair */
- if (ASIC_IS_DCE6(rdev) && !radeon_crtc->in_mode_set) {
+ if (ASIC_IS_DCE6(rdev)) {
struct drm_crtc *other_crtc;
struct radeon_crtc *other_radeon_crtc;
list_for_each_entry(other_crtc, &rdev->ddev->mode_config.crtc_list, head) {
@@ -1634,28 +1634,18 @@
static void atombios_crtc_prepare(struct drm_crtc *crtc)
{
struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
- struct drm_device *dev = crtc->dev;
- struct radeon_device *rdev = dev->dev_private;
- radeon_crtc->in_mode_set = true;
/* pick pll */
radeon_crtc->pll_id = radeon_atom_pick_pll(crtc);
- /* disable crtc pair power gating before programming */
- if (ASIC_IS_DCE6(rdev))
- atombios_powergate_crtc(crtc, ATOM_DISABLE);
-
atombios_lock_crtc(crtc, ATOM_ENABLE);
atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
}
static void atombios_crtc_commit(struct drm_crtc *crtc)
{
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
-
atombios_crtc_dpms(crtc, DRM_MODE_DPMS_ON);
atombios_lock_crtc(crtc, ATOM_DISABLE);
- radeon_crtc->in_mode_set = false;
}
static void atombios_crtc_disable(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 886b41f..c57d856 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -22,7 +22,6 @@
*
* Authors: Dave Airlie
* Alex Deucher
- * Jerome Glisse
*/
#include "drmP.h"
#include "radeon_drm.h"
@@ -638,6 +637,7 @@
ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS,
link_status, DP_LINK_STATUS_SIZE, 100);
if (ret <= 0) {
+ DRM_ERROR("displayport link status failed\n");
return false;
}
@@ -816,10 +816,8 @@
else
mdelay(dp_info->rd_interval * 4);
- if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
- DRM_ERROR("displayport link status failed\n");
+ if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
break;
- }
if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
clock_recovery = true;
@@ -881,10 +879,8 @@
else
mdelay(dp_info->rd_interval * 4);
- if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status)) {
- DRM_ERROR("displayport link status failed\n");
+ if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
break;
- }
if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
channel_eq = true;
diff --git a/drivers/gpu/drm/radeon/atombios_encoders.c b/drivers/gpu/drm/radeon/atombios_encoders.c
index a3ae788..2d39f99 100644
--- a/drivers/gpu/drm/radeon/atombios_encoders.c
+++ b/drivers/gpu/drm/radeon/atombios_encoders.c
@@ -1392,18 +1392,10 @@
case DRM_MODE_DPMS_ON:
/* some early dce3.2 boards have a bug in their transmitter control table */
if ((rdev->family == CHIP_RV710) || (rdev->family == CHIP_RV730) ||
- ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev)) {
- if (ASIC_IS_DCE6(rdev)) {
- /* It seems we need to call ATOM_ENCODER_CMD_SETUP again
- * before reenabling encoder on DPMS ON, otherwise we never
- * get picture
- */
- atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
- }
+ ASIC_IS_DCE41(rdev) || ASIC_IS_DCE5(rdev))
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
- } else {
+ else
atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE_OUTPUT, 0, 0);
- }
if (ENCODER_MODE_IS_DP(atombios_get_encoder_mode(encoder)) && connector) {
if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
atombios_set_edp_panel_power(connector,
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index e5328da..cfa372c 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1029,11 +1029,6 @@
WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
- if ((rdev->family == CHIP_JUNIPER) ||
- (rdev->family == CHIP_CYPRESS) ||
- (rdev->family == CHIP_HEMLOCK) ||
- (rdev->family == CHIP_BARTS))
- WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp);
}
WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
@@ -1117,8 +1112,24 @@
void evergreen_mc_stop(struct radeon_device *rdev, struct evergreen_mc_save *save)
{
+ save->vga_control[0] = RREG32(D1VGA_CONTROL);
+ save->vga_control[1] = RREG32(D2VGA_CONTROL);
save->vga_render_control = RREG32(VGA_RENDER_CONTROL);
save->vga_hdp_control = RREG32(VGA_HDP_CONTROL);
+ save->crtc_control[0] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET);
+ save->crtc_control[1] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET);
+ if (rdev->num_crtc >= 4) {
+ save->vga_control[2] = RREG32(EVERGREEN_D3VGA_CONTROL);
+ save->vga_control[3] = RREG32(EVERGREEN_D4VGA_CONTROL);
+ save->crtc_control[2] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET);
+ save->crtc_control[3] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET);
+ }
+ if (rdev->num_crtc >= 6) {
+ save->vga_control[4] = RREG32(EVERGREEN_D5VGA_CONTROL);
+ save->vga_control[5] = RREG32(EVERGREEN_D6VGA_CONTROL);
+ save->crtc_control[4] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET);
+ save->crtc_control[5] = RREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET);
+ }
/* Stop all video */
WREG32(VGA_RENDER_CONTROL, 0);
@@ -1229,6 +1240,47 @@
/* Unlock host access */
WREG32(VGA_HDP_CONTROL, save->vga_hdp_control);
mdelay(1);
+ /* Restore video state */
+ WREG32(D1VGA_CONTROL, save->vga_control[0]);
+ WREG32(D2VGA_CONTROL, save->vga_control[1]);
+ if (rdev->num_crtc >= 4) {
+ WREG32(EVERGREEN_D3VGA_CONTROL, save->vga_control[2]);
+ WREG32(EVERGREEN_D4VGA_CONTROL, save->vga_control[3]);
+ }
+ if (rdev->num_crtc >= 6) {
+ WREG32(EVERGREEN_D5VGA_CONTROL, save->vga_control[4]);
+ WREG32(EVERGREEN_D6VGA_CONTROL, save->vga_control[5]);
+ }
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 1);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 1);
+ if (rdev->num_crtc >= 4) {
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 1);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 1);
+ }
+ if (rdev->num_crtc >= 6) {
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 1);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 1);
+ }
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC0_REGISTER_OFFSET, save->crtc_control[0]);
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC1_REGISTER_OFFSET, save->crtc_control[1]);
+ if (rdev->num_crtc >= 4) {
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC2_REGISTER_OFFSET, save->crtc_control[2]);
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC3_REGISTER_OFFSET, save->crtc_control[3]);
+ }
+ if (rdev->num_crtc >= 6) {
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC4_REGISTER_OFFSET, save->crtc_control[4]);
+ WREG32(EVERGREEN_CRTC_CONTROL + EVERGREEN_CRTC5_REGISTER_OFFSET, save->crtc_control[5]);
+ }
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC0_REGISTER_OFFSET, 0);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC1_REGISTER_OFFSET, 0);
+ if (rdev->num_crtc >= 4) {
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC2_REGISTER_OFFSET, 0);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC3_REGISTER_OFFSET, 0);
+ }
+ if (rdev->num_crtc >= 6) {
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC4_REGISTER_OFFSET, 0);
+ WREG32(EVERGREEN_CRTC_UPDATE_LOCK + EVERGREEN_CRTC5_REGISTER_OFFSET, 0);
+ }
WREG32(VGA_RENDER_CONTROL, save->vga_render_control);
}
@@ -2084,20 +2136,9 @@
/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
if (rdev->flags & RADEON_IS_IGP)
rdev->config.evergreen.tile_config |= 1 << 4;
- else {
- switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
- case 0: /* four banks */
- rdev->config.evergreen.tile_config |= 0 << 4;
- break;
- case 1: /* eight banks */
- rdev->config.evergreen.tile_config |= 1 << 4;
- break;
- case 2: /* sixteen banks */
- default:
- rdev->config.evergreen.tile_config |= 2 << 4;
- break;
- }
- }
+ else
+ rdev->config.evergreen.tile_config |=
+ ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
rdev->config.evergreen.tile_config |=
((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) << 8;
rdev->config.evergreen.tile_config |=
@@ -2129,9 +2170,9 @@
WREG32(CC_SYS_RB_BACKEND_DISABLE, rb);
WREG32(GC_USER_RB_BACKEND_DISABLE, rb);
WREG32(CC_GC_SHADER_PIPE_CONFIG, sp);
- }
+ }
- grbm_gfx_index = INSTANCE_BROADCAST_WRITES | SE_BROADCAST_WRITES;
+ grbm_gfx_index |= SE_BROADCAST_WRITES;
WREG32(GRBM_GFX_INDEX, grbm_gfx_index);
WREG32(RLC_GFX_INDEX, grbm_gfx_index);
@@ -2161,9 +2202,6 @@
smx_dc_ctl0 |= NUMBER_OF_SETS(rdev->config.evergreen.sx_num_of_sets);
WREG32(SMX_DC_CTL0, smx_dc_ctl0);
- if (rdev->family <= CHIP_SUMO2)
- WREG32(SMX_SAR_CTL0, 0x00010000);
-
WREG32(SX_EXPORT_BUFFER_SIZES, (COLOR_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_size / 4) - 1) |
POSITION_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_pos_size / 4) - 1) |
SMX_BUFFER_SIZE((rdev->config.evergreen.sx_max_export_smx_size / 4) - 1)));
diff --git a/drivers/gpu/drm/radeon/evergreen_cs.c b/drivers/gpu/drm/radeon/evergreen_cs.c
index ea69dae..70089d3 100644
--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@@ -52,7 +52,6 @@
u32 cb_color_view[12];
u32 cb_color_pitch[12];
u32 cb_color_slice[12];
- u32 cb_color_slice_idx[12];
u32 cb_color_attrib[12];
u32 cb_color_cmask_slice[8];/* unused */
u32 cb_color_fmask_slice[8];/* unused */
@@ -128,14 +127,12 @@
track->cb_color_info[i] = 0;
track->cb_color_view[i] = 0xFFFFFFFF;
track->cb_color_pitch[i] = 0;
- track->cb_color_slice[i] = 0xfffffff;
- track->cb_color_slice_idx[i] = 0;
+ track->cb_color_slice[i] = 0;
}
track->cb_target_mask = 0xFFFFFFFF;
track->cb_shader_mask = 0xFFFFFFFF;
track->cb_dirty = true;
- track->db_depth_slice = 0xffffffff;
track->db_depth_view = 0xFFFFC000;
track->db_depth_size = 0xFFFFFFFF;
track->db_depth_control = 0xFFFFFFFF;
@@ -253,9 +250,10 @@
{
struct evergreen_cs_track *track = p->track;
unsigned palign, halign, tileb, slice_pt;
- unsigned mtile_pr, mtile_ps, mtileb;
tileb = 64 * surf->bpe * surf->nsamples;
+ palign = track->group_size / (8 * surf->bpe * surf->nsamples);
+ palign = MAX(8, palign);
slice_pt = 1;
if (tileb > surf->tsplit) {
slice_pt = tileb / surf->tsplit;
@@ -264,10 +262,7 @@
/* macro tile width & height */
palign = (8 * surf->bankw * track->npipes) * surf->mtilea;
halign = (8 * surf->bankh * surf->nbanks) / surf->mtilea;
- mtileb = (palign / 8) * (halign / 8) * tileb;;
- mtile_pr = surf->nbx / palign;
- mtile_ps = (mtile_pr * surf->nby) / halign;
- surf->layer_size = mtile_ps * mtileb * slice_pt;
+ surf->layer_size = surf->nbx * surf->nby * surf->bpe * slice_pt;
surf->base_align = (palign / 8) * (halign / 8) * tileb;
surf->palign = palign;
surf->halign = halign;
@@ -439,39 +434,6 @@
offset += surf.layer_size * mslice;
if (offset > radeon_bo_size(track->cb_color_bo[id])) {
- /* old ddx are broken they allocate bo with w*h*bpp but
- * program slice with ALIGN(h, 8), catch this and patch
- * command stream.
- */
- if (!surf.mode) {
- volatile u32 *ib = p->ib->ptr;
- unsigned long tmp, nby, bsize, size, min = 0;
-
- /* find the height the ddx wants */
- if (surf.nby > 8) {
- min = surf.nby - 8;
- }
- bsize = radeon_bo_size(track->cb_color_bo[id]);
- tmp = track->cb_color_bo_offset[id] << 8;
- for (nby = surf.nby; nby > min; nby--) {
- size = nby * surf.nbx * surf.bpe * surf.nsamples;
- if ((tmp + size * mslice) <= bsize) {
- break;
- }
- }
- if (nby > min) {
- surf.nby = nby;
- slice = ((nby * surf.nbx) / 64) - 1;
- if (!evergreen_surface_check(p, &surf, "cb")) {
- /* check if this one works */
- tmp += surf.layer_size * mslice;
- if (tmp <= bsize) {
- ib[track->cb_color_slice_idx[id]] = slice;
- goto old_ddx_ok;
- }
- }
- }
- }
dev_warn(p->dev, "%s:%d cb[%d] bo too small (layer size %d, "
"offset %d, max layer %d, bo size %ld, slice %d)\n",
__func__, __LINE__, id, surf.layer_size,
@@ -484,7 +446,6 @@
surf.tsplit, surf.mtilea);
return -EINVAL;
}
-old_ddx_ok:
return 0;
}
@@ -1571,7 +1532,6 @@
case CB_COLOR7_SLICE:
tmp = (reg - CB_COLOR0_SLICE) / 0x3c;
track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
- track->cb_color_slice_idx[tmp] = idx;
track->cb_dirty = true;
break;
case CB_COLOR8_SLICE:
@@ -1580,7 +1540,6 @@
case CB_COLOR11_SLICE:
tmp = ((reg - CB_COLOR8_SLICE) / 0x1c) + 8;
track->cb_color_slice[tmp] = radeon_get_ib_value(p, idx);
- track->cb_color_slice_idx[tmp] = idx;
track->cb_dirty = true;
break;
case CB_COLOR0_ATTRIB:
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index f62ccd3..b4eefc3 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -232,7 +232,6 @@
#define MC_VM_MD_L1_TLB0_CNTL 0x2654
#define MC_VM_MD_L1_TLB1_CNTL 0x2658
#define MC_VM_MD_L1_TLB2_CNTL 0x265C
-#define MC_VM_MD_L1_TLB3_CNTL 0x2698
#define FUS_MC_VM_MD_L1_TLB0_CNTL 0x265C
#define FUS_MC_VM_MD_L1_TLB1_CNTL 0x2660
@@ -273,7 +272,6 @@
#define SCRATCH_UMSK 0x8540
#define SCRATCH_ADDR 0x8544
-#define SMX_SAR_CTL0 0xA008
#define SMX_DC_CTL0 0xA020
#define USE_HASH_FUNCTION (1 << 0)
#define NUMBER_OF_SETS(x) ((x) << 1)
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 9934c9d..a48ca53 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -657,28 +657,15 @@
rdev->config.cayman.max_pipes_per_simd = 4;
rdev->config.cayman.max_tile_pipes = 2;
if ((rdev->pdev->device == 0x9900) ||
- (rdev->pdev->device == 0x9901) ||
- (rdev->pdev->device == 0x9905) ||
- (rdev->pdev->device == 0x9906) ||
- (rdev->pdev->device == 0x9907) ||
- (rdev->pdev->device == 0x9908) ||
- (rdev->pdev->device == 0x9909) ||
- (rdev->pdev->device == 0x9910) ||
- (rdev->pdev->device == 0x9917)) {
+ (rdev->pdev->device == 0x9901)) {
rdev->config.cayman.max_simds_per_se = 6;
rdev->config.cayman.max_backends_per_se = 2;
} else if ((rdev->pdev->device == 0x9903) ||
- (rdev->pdev->device == 0x9904) ||
- (rdev->pdev->device == 0x990A) ||
- (rdev->pdev->device == 0x9913) ||
- (rdev->pdev->device == 0x9918)) {
+ (rdev->pdev->device == 0x9904)) {
rdev->config.cayman.max_simds_per_se = 4;
rdev->config.cayman.max_backends_per_se = 2;
- } else if ((rdev->pdev->device == 0x9919) ||
- (rdev->pdev->device == 0x9990) ||
- (rdev->pdev->device == 0x9991) ||
- (rdev->pdev->device == 0x9994) ||
- (rdev->pdev->device == 0x99A0)) {
+ } else if ((rdev->pdev->device == 0x9990) ||
+ (rdev->pdev->device == 0x9991)) {
rdev->config.cayman.max_simds_per_se = 3;
rdev->config.cayman.max_backends_per_se = 1;
} else {
@@ -878,21 +865,10 @@
/* num banks is 8 on all fusion asics. 0 = 4, 1 = 8, 2 = 16 */
if (rdev->flags & RADEON_IS_IGP)
- rdev->config.cayman.tile_config |= 1 << 4;
- else {
- switch ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) {
- case 0: /* four banks */
- rdev->config.cayman.tile_config |= 0 << 4;
- break;
- case 1: /* eight banks */
- rdev->config.cayman.tile_config |= 1 << 4;
- break;
- case 2: /* sixteen banks */
- default:
- rdev->config.cayman.tile_config |= 2 << 4;
- break;
- }
- }
+ rdev->config.evergreen.tile_config |= 1 << 4;
+ else
+ rdev->config.cayman.tile_config |=
+ ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
rdev->config.cayman.tile_config |=
((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
rdev->config.cayman.tile_config |=
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index b1ff9cc..c8187c4 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -1906,7 +1906,6 @@
WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
NUM_CLIP_SEQ(3)));
WREG32(PA_SC_ENHANCE, FORCE_EOV_MAX_CLK_CNT(4095));
- WREG32(VC_ENHANCE, 0);
}
diff --git a/drivers/gpu/drm/radeon/r600_audio.c b/drivers/gpu/drm/radeon/r600_audio.c
index 24e3939..ba66f30 100644
--- a/drivers/gpu/drm/radeon/r600_audio.c
+++ b/drivers/gpu/drm/radeon/r600_audio.c
@@ -239,7 +239,6 @@
struct radeon_device *rdev = dev->dev_private;
struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(encoder->crtc);
int base_rate = 48000;
switch (radeon_encoder->encoder_id) {
@@ -265,8 +264,8 @@
WREG32(EVERGREEN_AUDIO_PLL1_DIV, clock * 10);
WREG32(EVERGREEN_AUDIO_PLL1_UNK, 0x00000071);
- /* Select DTO source */
- WREG32(0x5ac, radeon_crtc->crtc_id);
+ /* Some magic trigger or src sel? */
+ WREG32_P(0x5ac, 0x01, ~0x77);
} else {
switch (dig->dig_encoder) {
case 0:
diff --git a/drivers/gpu/drm/radeon/r600d.h b/drivers/gpu/drm/radeon/r600d.h
index 12ceb82..59f9c99 100644
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@@ -483,7 +483,6 @@
#define TC_L2_SIZE(x) ((x)<<5)
#define L2_DISABLE_LATE_HIT (1<<9)
-#define VC_ENHANCE 0x9714
#define VGT_CACHE_INVALIDATION 0x88C4
#define CACHE_INVALIDATION(x) ((x)<<0)
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index 665df87..3d9f9f1 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -262,10 +262,13 @@
* rv515
*/
struct rv515_mc_save {
+ u32 d1vga_control;
+ u32 d2vga_control;
u32 vga_render_control;
u32 vga_hdp_control;
+ u32 d1crtc_control;
+ u32 d2crtc_control;
};
-
int rv515_init(struct radeon_device *rdev);
void rv515_fini(struct radeon_device *rdev);
uint32_t rv515_mc_rreg(struct radeon_device *rdev, uint32_t reg);
@@ -398,10 +401,11 @@
* evergreen
*/
struct evergreen_mc_save {
+ u32 vga_control[6];
u32 vga_render_control;
u32 vga_hdp_control;
+ u32 crtc_control[6];
};
-
void evergreen_pcie_gart_tlb_flush(struct radeon_device *rdev);
int evergreen_init(struct radeon_device *rdev);
void evergreen_fini(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_atombios.c b/drivers/gpu/drm/radeon/radeon_atombios.c
index b1e3820..f6e69b8 100644
--- a/drivers/gpu/drm/radeon/radeon_atombios.c
+++ b/drivers/gpu/drm/radeon/radeon_atombios.c
@@ -444,9 +444,7 @@
*/
if ((dev->pdev->device == 0x9498) &&
(dev->pdev->subsystem_vendor == 0x1682) &&
- (dev->pdev->subsystem_device == 0x2452) &&
- (i2c_bus->valid == false) &&
- !(supported_device & (ATOM_DEVICE_TV_SUPPORT | ATOM_DEVICE_CV_SUPPORT))) {
+ (dev->pdev->subsystem_device == 0x2452)) {
struct radeon_device *rdev = dev->dev_private;
*i2c_bus = radeon_lookup_i2c_gpio(rdev, 0x93);
}
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 3fb7ca9..3c2e7a0 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -64,33 +64,14 @@
/* just deal with DP (not eDP) here. */
if (connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) {
- struct radeon_connector_atom_dig *dig_connector =
- radeon_connector->con_priv;
+ int saved_dpms = connector->dpms;
- /* if existing sink type was not DP no need to retrain */
- if (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT)
- return;
-
- /* first get sink type as it may be reset after (un)plug */
- dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
- /* don't do anything if sink is not display port, i.e.,
- * passive dp->(dvi|hdmi) adaptor
- */
- if (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
- int saved_dpms = connector->dpms;
- /* Only turn off the display if it's physically disconnected */
- if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
- } else if (radeon_dp_needs_link_train(radeon_connector)) {
- /* set it to OFF so that drm_helper_connector_dpms()
- * won't return immediately since the current state
- * is ON at this point.
- */
- connector->dpms = DRM_MODE_DPMS_OFF;
- drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
- }
- connector->dpms = saved_dpms;
- }
+ /* Only turn off the display it it's physically disconnected */
+ if (!radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+ else if (radeon_dp_needs_link_train(radeon_connector))
+ drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+ connector->dpms = saved_dpms;
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index cf723c4..5cac832 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -158,7 +158,6 @@
return 0;
}
-/* XXX: note that this is called from the legacy UMS CS ioctl as well */
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
{
struct drm_radeon_cs *cs = data;
@@ -253,25 +252,23 @@
}
}
- /* these are KMS only */
- if (p->rdev) {
- if ((p->cs_flags & RADEON_CS_USE_VM) &&
- !p->rdev->vm_manager.enabled) {
- DRM_ERROR("VM not active on asic!\n");
- return -EINVAL;
- }
-
- /* we only support VM on SI+ */
- if ((p->rdev->family >= CHIP_TAHITI) &&
- ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
- DRM_ERROR("VM required on SI+!\n");
- return -EINVAL;
- }
-
- if (radeon_cs_get_ring(p, ring, priority))
- return -EINVAL;
+ if ((p->cs_flags & RADEON_CS_USE_VM) &&
+ !p->rdev->vm_manager.enabled) {
+ DRM_ERROR("VM not active on asic!\n");
+ return -EINVAL;
}
+ /* we only support VM on SI+ */
+ if ((p->rdev->family >= CHIP_TAHITI) &&
+ ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
+ DRM_ERROR("VM required on SI+!\n");
+ return -EINVAL;
+ }
+
+ if (radeon_cs_get_ring(p, ring, priority))
+ return -EINVAL;
+
+
/* deal with non-vm */
if ((p->chunk_ib_idx != -1) &&
((p->cs_flags & RADEON_CS_USE_VM) == 0) &&
@@ -377,7 +374,7 @@
if (r) {
DRM_ERROR("Failed to schedule IB !\n");
}
- return r;
+ return 0;
}
static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index 711e95a..42acc64 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -262,14 +262,8 @@
if (!(cursor_end & 0x7f))
w--;
}
- if (w <= 0) {
+ if (w <= 0)
w = 1;
- cursor_end = x - xorigin + w;
- if (!(cursor_end & 0x7f)) {
- x--;
- WARN_ON_ONCE(x < 0);
- }
- }
}
}
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 15250fb..ef7bb3f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -57,10 +57,9 @@
* 2.13.0 - virtual memory support, streamout
* 2.14.0 - add evergreen tiling informations
* 2.15.0 - add max_pipes query
- * 2.16.0 - fix evergreen 2D tiled surface calculation
*/
#define KMS_DRIVER_MAJOR 2
-#define KMS_DRIVER_MINOR 16
+#define KMS_DRIVER_MINOR 15
#define KMS_DRIVER_PATCHLEVEL 0
int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index 2a4c592..c58a036 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -289,9 +289,8 @@
rdev->vm_manager.enabled = false;
/* mark first vm as always in use, it's the system one */
- /* allocate enough for 2 full VM pts */
r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
- rdev->vm_manager.max_pfn * 8 * 2,
+ rdev->vm_manager.max_pfn * 8,
RADEON_GEM_DOMAIN_VRAM);
if (r) {
dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
@@ -479,18 +478,12 @@
mutex_lock(&vm->mutex);
if (last_pfn > vm->last_pfn) {
- /* release mutex and lock in right order */
- mutex_unlock(&vm->mutex);
+ /* grow va space 32M by 32M */
+ unsigned align = ((32 << 20) >> 12) - 1;
radeon_mutex_lock(&rdev->cs_mutex);
- mutex_lock(&vm->mutex);
- /* and check again */
- if (last_pfn > vm->last_pfn) {
- /* grow va space 32M by 32M */
- unsigned align = ((32 << 20) >> 12) - 1;
- radeon_vm_unbind_locked(rdev, vm);
- vm->last_pfn = (last_pfn + align) & ~align;
- }
+ radeon_vm_unbind_locked(rdev, vm);
radeon_mutex_unlock(&rdev->cs_mutex);
+ vm->last_pfn = (last_pfn + align) & ~align;
}
head = &vm->va;
last_offset = 0;
@@ -604,8 +597,8 @@
if (bo_va == NULL)
return 0;
- radeon_mutex_lock(&rdev->cs_mutex);
mutex_lock(&vm->mutex);
+ radeon_mutex_lock(&rdev->cs_mutex);
radeon_vm_bo_update_pte(rdev, vm, bo, NULL);
radeon_mutex_unlock(&rdev->cs_mutex);
list_del(&bo_va->vm_list);
@@ -636,15 +629,7 @@
mutex_init(&vm->mutex);
INIT_LIST_HEAD(&vm->list);
INIT_LIST_HEAD(&vm->va);
- /* SI requires equal sized PTs for all VMs, so always set
- * last_pfn to max_pfn. cayman allows variable sized
- * pts so we can grow then as needed. Once we switch
- * to two level pts we can unify this again.
- */
- if (rdev->family >= CHIP_TAHITI)
- vm->last_pfn = rdev->vm_manager.max_pfn;
- else
- vm->last_pfn = 0;
+ vm->last_pfn = 0;
/* map the ib pool buffer at 0 in virtual address space, set
* read only
*/
@@ -658,8 +643,9 @@
struct radeon_bo_va *bo_va, *tmp;
int r;
- radeon_mutex_lock(&rdev->cs_mutex);
mutex_lock(&vm->mutex);
+
+ radeon_mutex_lock(&rdev->cs_mutex);
radeon_vm_unbind_locked(rdev, vm);
radeon_mutex_unlock(&rdev->cs_mutex);
diff --git a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
index 9760e5a..210317c 100644
--- a/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
+++ b/drivers/gpu/drm/radeon/radeon_legacy_crtc.c
@@ -1025,11 +1025,9 @@
static void radeon_crtc_prepare(struct drm_crtc *crtc)
{
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_crtc *crtci;
- radeon_crtc->in_mode_set = true;
/*
* The hardware wedges sometimes if you reconfigure one CRTC
* whilst another is running (see fdo bug #24611).
@@ -1040,7 +1038,6 @@
static void radeon_crtc_commit(struct drm_crtc *crtc)
{
- struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
struct drm_device *dev = crtc->dev;
struct drm_crtc *crtci;
@@ -1051,7 +1048,6 @@
if (crtci->enabled)
radeon_crtc_dpms(crtci, DRM_MODE_DPMS_ON);
}
- radeon_crtc->in_mode_set = false;
}
static const struct drm_crtc_helper_funcs legacy_helper_funcs = {
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 778c1f0..48dae40 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -266,7 +266,6 @@
u16 lut_r[256], lut_g[256], lut_b[256];
bool enabled;
bool can_tile;
- bool in_mode_set;
uint32_t crtc_offset;
struct drm_gem_object *cursor_bo;
uint64_t cursor_addr;
@@ -440,6 +439,9 @@
struct radeon_i2c_chan *ddc_bus;
/* some systems have an hdmi and vga port with a shared ddc line */
bool shared_ddc;
+ /* for some Radeon chip families we apply an additional EDID header
+ check as part of the DDC probe */
+ bool requires_extended_probe;
bool use_digital;
/* we need to mind the EDID between detect
and get modes due to analog/digital/tvencoder */
@@ -527,7 +529,8 @@
u8 val);
extern void radeon_router_select_ddc_port(struct radeon_connector *radeon_connector);
extern void radeon_router_select_cd_port(struct radeon_connector *radeon_connector);
-extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector);
+extern bool radeon_ddc_probe(struct radeon_connector *radeon_connector,
+ bool requires_extended_probe);
extern int radeon_ddc_get_modes(struct radeon_connector *radeon_connector);
extern struct drm_encoder *radeon_best_encoder(struct drm_connector *connector);
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 80c6e8b..df6a4db 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -136,6 +136,7 @@
acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
sizeof(struct radeon_bo));
+retry:
bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
if (bo == NULL)
return -ENOMEM;
@@ -149,8 +150,6 @@
bo->surface_reg = -1;
INIT_LIST_HEAD(&bo->list);
INIT_LIST_HEAD(&bo->va);
-
-retry:
radeon_ttm_placement_from_domain(bo, domain);
/* Kernel allocation are uninterruptible */
mutex_lock(&rdev->vram_mutex);
diff --git a/drivers/gpu/drm/radeon/rv515.c b/drivers/gpu/drm/radeon/rv515.c
index 43af363..d8d78fe 100644
--- a/drivers/gpu/drm/radeon/rv515.c
+++ b/drivers/gpu/drm/radeon/rv515.c
@@ -281,8 +281,12 @@
void rv515_mc_stop(struct radeon_device *rdev, struct rv515_mc_save *save)
{
+ save->d1vga_control = RREG32(R_000330_D1VGA_CONTROL);
+ save->d2vga_control = RREG32(R_000338_D2VGA_CONTROL);
save->vga_render_control = RREG32(R_000300_VGA_RENDER_CONTROL);
save->vga_hdp_control = RREG32(R_000328_VGA_HDP_CONTROL);
+ save->d1crtc_control = RREG32(R_006080_D1CRTC_CONTROL);
+ save->d2crtc_control = RREG32(R_006880_D2CRTC_CONTROL);
/* Stop all video */
WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
@@ -307,6 +311,15 @@
/* Unlock host access */
WREG32(R_000328_VGA_HDP_CONTROL, save->vga_hdp_control);
mdelay(1);
+ /* Restore video state */
+ WREG32(R_000330_D1VGA_CONTROL, save->d1vga_control);
+ WREG32(R_000338_D2VGA_CONTROL, save->d2vga_control);
+ WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 1);
+ WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 1);
+ WREG32(R_006080_D1CRTC_CONTROL, save->d1crtc_control);
+ WREG32(R_006880_D2CRTC_CONTROL, save->d2crtc_control);
+ WREG32(R_0060E8_D1CRTC_UPDATE_LOCK, 0);
+ WREG32(R_0068E8_D2CRTC_UPDATE_LOCK, 0);
WREG32(R_000300_VGA_RENDER_CONTROL, save->vga_render_control);
}
diff --git a/drivers/gpu/drm/radeon/rv770.c b/drivers/gpu/drm/radeon/rv770.c
index 591040b..cdab1ae 100644
--- a/drivers/gpu/drm/radeon/rv770.c
+++ b/drivers/gpu/drm/radeon/rv770.c
@@ -151,8 +151,6 @@
WREG32(MC_VM_MD_L1_TLB0_CNTL, tmp);
WREG32(MC_VM_MD_L1_TLB1_CNTL, tmp);
WREG32(MC_VM_MD_L1_TLB2_CNTL, tmp);
- if (rdev->family == CHIP_RV740)
- WREG32(MC_VM_MD_L1_TLB3_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB0_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB1_CNTL, tmp);
WREG32(MC_VM_MB_L1_TLB2_CNTL, tmp);
@@ -691,12 +689,8 @@
if (rdev->family == CHIP_RV770)
gb_tiling_config |= BANK_TILING(1);
- else {
- if ((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT)
- gb_tiling_config |= BANK_TILING(1);
- else
- gb_tiling_config |= BANK_TILING(0);
- }
+ else
+ gb_tiling_config |= BANK_TILING((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT);
rdev->config.rv770.tiling_nbanks = 4 << ((gb_tiling_config >> 4) & 0x3);
gb_tiling_config |= GROUP_SIZE((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT);
if ((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT)
@@ -782,9 +776,6 @@
ACK_FLUSH_CTL(3) |
SYNC_FLUSH_CTL));
- if (rdev->family != CHIP_RV770)
- WREG32(SMX_SAR_CTL0, 0x00003f3f);
-
db_debug3 = RREG32(DB_DEBUG3);
db_debug3 &= ~DB_CLK_OFF_DELAY(0x1f);
switch (rdev->family) {
@@ -963,7 +954,7 @@
WREG32(PA_CL_ENHANCE, (CLIP_VTX_REORDER_ENA |
NUM_CLIP_SEQ(3)));
- WREG32(VC_ENHANCE, 0);
+
}
void r700_vram_gtt_location(struct radeon_device *rdev, struct radeon_mc *mc)
diff --git a/drivers/gpu/drm/radeon/rv770d.h b/drivers/gpu/drm/radeon/rv770d.h
index 7095a71..79fa588 100644
--- a/drivers/gpu/drm/radeon/rv770d.h
+++ b/drivers/gpu/drm/radeon/rv770d.h
@@ -174,7 +174,6 @@
#define MC_VM_MD_L1_TLB0_CNTL 0x2654
#define MC_VM_MD_L1_TLB1_CNTL 0x2658
#define MC_VM_MD_L1_TLB2_CNTL 0x265C
-#define MC_VM_MD_L1_TLB3_CNTL 0x2698
#define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR 0x203C
#define MC_VM_SYSTEM_APERTURE_HIGH_ADDR 0x2038
#define MC_VM_SYSTEM_APERTURE_LOW_ADDR 0x2034
@@ -208,7 +207,6 @@
#define SCRATCH_UMSK 0x8540
#define SCRATCH_ADDR 0x8544
-#define SMX_SAR_CTL0 0xA008
#define SMX_DC_CTL0 0xA020
#define USE_HASH_FUNCTION (1 << 0)
#define CACHE_DEPTH(x) ((x) << 1)
@@ -308,8 +306,6 @@
#define TCP_CNTL 0x9610
#define TCP_CHAN_STEER 0x9614
-#define VC_ENHANCE 0x9714
-
#define VGT_CACHE_INVALIDATION 0x88C4
#define CACHE_INVALIDATION(x) ((x)<<0)
#define VC_ONLY 0
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 2af1ce6..27bda98 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -2527,12 +2527,12 @@
WREG32(0x15DC, 0);
/* empty context1-15 */
- /* FIXME start with 4G, once using 2 level pt switch to full
+ /* FIXME start with 1G, once using 2 level pt switch to full
* vm size space
*/
/* set vm size, must be a multiple of 4 */
WREG32(VM_CONTEXT1_PAGE_TABLE_START_ADDR, 0);
- WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, rdev->vm_manager.max_pfn);
+ WREG32(VM_CONTEXT1_PAGE_TABLE_END_ADDR, (1 << 30) / RADEON_GPU_PAGE_SIZE);
for (i = 1; i < 16; i++) {
if (i < 8)
WREG32(VM_CONTEXT0_PAGE_TABLE_BASE_ADDR + (i << 2),
diff --git a/drivers/gpu/drm/sis/sis_drv.c b/drivers/gpu/drm/sis/sis_drv.c
index dd14cd1..30d98d1 100644
--- a/drivers/gpu/drm/sis/sis_drv.c
+++ b/drivers/gpu/drm/sis/sis_drv.c
@@ -47,9 +47,9 @@
if (dev_priv == NULL)
return -ENOMEM;
- idr_init(&dev_priv->object_idr);
dev->dev_private = (void *)dev_priv;
dev_priv->chipset = chipset;
+ idr_init(&dev->object_name_idr);
return 0;
}
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 8b73ae8..1f5c67c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1193,7 +1193,6 @@
(*destroy)(bo);
else
kfree(bo);
- ttm_mem_global_free(mem_glob, acc_size);
return -EINVAL;
}
bo->destroy = destroy;
@@ -1295,14 +1294,22 @@
struct ttm_buffer_object **p_bo)
{
struct ttm_buffer_object *bo;
+ struct ttm_mem_global *mem_glob = bdev->glob->mem_glob;
size_t acc_size;
int ret;
- bo = kzalloc(sizeof(*bo), GFP_KERNEL);
- if (unlikely(bo == NULL))
- return -ENOMEM;
-
acc_size = ttm_bo_acc_size(bdev, size, sizeof(struct ttm_buffer_object));
+ ret = ttm_mem_global_alloc(mem_glob, acc_size, false, false);
+ if (unlikely(ret != 0))
+ return ret;
+
+ bo = kzalloc(sizeof(*bo), GFP_KERNEL);
+
+ if (unlikely(bo == NULL)) {
+ ttm_mem_global_free(mem_glob, acc_size);
+ return -ENOMEM;
+ }
+
ret = ttm_bo_init(bdev, bo, size, type, placement, page_alignment,
buffer_start, interruptible,
persistent_swap_storage, acc_size, NULL);
@@ -1814,7 +1821,6 @@
spin_unlock(&glob->lru_lock);
(void) ttm_bo_cleanup_refs(bo, false, false, false);
kref_put(&bo->list_kref, ttm_bo_release_list);
- spin_lock(&glob->lru_lock);
continue;
}
diff --git a/drivers/gpu/drm/udl/udl_drv.c b/drivers/gpu/drm/udl/udl_drv.c
index 08eff0d..5367390 100644
--- a/drivers/gpu/drm/udl/udl_drv.c
+++ b/drivers/gpu/drm/udl/udl_drv.c
@@ -13,21 +13,8 @@
static struct drm_driver driver;
-/*
- * There are many DisplayLink-based graphics products, all with unique PIDs.
- * So we match on DisplayLink's VID + Vendor-Defined Interface Class (0xff)
- * We also require a match on SubClass (0x00) and Protocol (0x00),
- * which is compatible with all known USB 2.0 era graphics chips and firmware,
- * but allows DisplayLink to increment those for any future incompatible chips
- */
static struct usb_device_id id_table[] = {
- {.idVendor = 0x17e9, .bInterfaceClass = 0xff,
- .bInterfaceSubClass = 0x00,
- .bInterfaceProtocol = 0x00,
- .match_flags = USB_DEVICE_ID_MATCH_VENDOR |
- USB_DEVICE_ID_MATCH_INT_CLASS |
- USB_DEVICE_ID_MATCH_INT_SUBCLASS |
- USB_DEVICE_ID_MATCH_INT_PROTOCOL,},
+ {.idVendor = 0x17e9, .match_flags = USB_DEVICE_ID_MATCH_VENDOR,},
{},
};
MODULE_DEVICE_TABLE(usb, id_table);
diff --git a/drivers/gpu/drm/via/via_map.c b/drivers/gpu/drm/via/via_map.c
index c126182..1f18225 100644
--- a/drivers/gpu/drm/via/via_map.c
+++ b/drivers/gpu/drm/via/via_map.c
@@ -100,11 +100,12 @@
if (dev_priv == NULL)
return -ENOMEM;
- idr_init(&dev_priv->object_idr);
dev->dev_private = (void *)dev_priv;
dev_priv->chipset = chipset;
+ idr_init(&dev->object_name_idr);
+
pci_set_master(dev->pdev);
ret = drm_vblank_init(dev, 1);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
index 21ee782..51c9ba5 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_gmr.c
@@ -66,7 +66,7 @@
cmd += sizeof(remap_cmd) / sizeof(uint32);
for (i = 0; i < num_pages; ++i) {
- if (VMW_PPN_SIZE <= 4)
+ if (VMW_PPN_SIZE > 4)
*cmd = page_to_pfn(*pages++);
else
*((uint64_t *)cmd) = page_to_pfn(*pages++);
diff --git a/drivers/gpu/ion/Makefile b/drivers/gpu/ion/Makefile
index c9e8a94..51349f6 100644
--- a/drivers/gpu/ion/Makefile
+++ b/drivers/gpu/ion/Makefile
@@ -1,3 +1,4 @@
obj-$(CONFIG_ION) += ion.o ion_heap.o ion_system_heap.o ion_carveout_heap.o ion_iommu_heap.o ion_cp_heap.o
+obj-$(CONFIG_CMA) += ion_cma_heap.o
obj-$(CONFIG_ION_TEGRA) += tegra/
obj-$(CONFIG_ION_MSM) += msm/
diff --git a/drivers/gpu/ion/ion.c b/drivers/gpu/ion/ion.c
index c9df909..d005605 100644
--- a/drivers/gpu/ion/ion.c
+++ b/drivers/gpu/ion/ion.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -33,11 +33,20 @@
#include <linux/uaccess.h>
#include <linux/debugfs.h>
#include <linux/dma-buf.h>
+#include <linux/msm_ion.h>
#include <mach/iommu_domains.h>
#include "ion_priv.h"
#define DEBUG
+/**
+ * struct ion_device - the metadata of the ion device node
+ * @dev: the actual misc device
+ * @buffers: an rb tree of all the existing buffers
+ * @lock: lock protecting the buffers & heaps trees
+ * @heaps: list of all the heaps in the system
+ * @user_clients: list of all the clients created from userspace
+ */
struct ion_device {
struct miscdevice dev;
struct rb_root buffers;
@@ -49,6 +58,20 @@
struct dentry *debug_root;
};
+/**
+ * struct ion_client - a process/hw block local address space
+ * @node: node in the tree of all clients
+ * @dev: backpointer to ion device
+ * @handles: an rb tree of all the handles in this client
+ * @lock: lock protecting the tree of handles
+ * @heap_mask: mask of all supported heaps
+ * @name: used for debugging
+ * @task: used for debugging
+ *
+ * A client represents a list of buffers this client may access.
+ * The mutex stored here is used to protect both handles tree
+ * as well as the handles themselves, and should be held while modifying either.
+ */
struct ion_client {
struct rb_node node;
struct ion_device *dev;
@@ -61,6 +84,18 @@
struct dentry *debug_root;
};
+/**
+ * ion_handle - a client local reference to a buffer
+ * @ref: reference count
+ * @client: back pointer to the client the buffer resides in
+ * @buffer: pointer to the buffer
+ * @node: node in the client's handle rbtree
+ * @kmap_cnt: count of times this client has mapped to kernel
+ * @dmap_cnt: count of times this client has mapped for dma
+ *
+ * Modifications to node, map_cnt or mapping should be protected by the
+ * lock in the client. Other fields are never changed after initialization.
+ */
struct ion_handle {
struct kref ref;
struct ion_client *client;
@@ -72,24 +107,7 @@
static void ion_iommu_release(struct kref *kref);
-static int ion_validate_buffer_flags(struct ion_buffer *buffer,
- unsigned long flags)
-{
- if (buffer->kmap_cnt || buffer->dmap_cnt || buffer->umap_cnt ||
- buffer->iommu_map_cnt) {
- if (buffer->flags != flags) {
- pr_err("%s: buffer was already mapped with flags %lx,"
- " cannot map with flags %lx\n", __func__,
- buffer->flags, flags);
- return 1;
- }
-
- } else {
- buffer->flags = flags;
- }
- return 0;
-}
-
+/* this function should only be called while dev->lock is held */
static void ion_buffer_add(struct ion_device *dev,
struct ion_buffer *buffer)
{
@@ -170,6 +188,7 @@
return NULL;
}
+/* this function should only be called while dev->lock is held */
static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
struct ion_device *dev,
unsigned long len,
@@ -195,6 +214,7 @@
buffer->dev = dev;
buffer->size = len;
+ buffer->flags = flags;
table = buffer->heap->ops->map_dma(buffer->heap, buffer);
if (IS_ERR_OR_NULL(table)) {
@@ -209,6 +229,10 @@
return buffer;
}
+/**
+ * Check for delayed IOMMU unmapping. Also unmap any outstanding
+ * mappings which would otherwise have been leaked.
+ */
static void ion_iommu_delayed_unmap(struct ion_buffer *buffer)
{
struct ion_iommu_map *iommu_map;
@@ -229,7 +253,7 @@
__func__, iommu_map->domain_info[DI_DOMAIN_NUM],
iommu_map->domain_info[DI_PARTITION_NUM]);
}
-
+ /* set ref count to 1 to force release */
kref_init(&iommu_map->ref);
kref_put(&iommu_map->ref, ion_iommu_release);
}
@@ -371,7 +395,8 @@
}
struct ion_handle *ion_alloc(struct ion_client *client, size_t len,
- size_t align, unsigned int flags)
+ size_t align, unsigned int heap_mask,
+ unsigned int flags)
{
struct rb_node *n;
struct ion_handle *handle;
@@ -384,6 +409,12 @@
dbg_str[0] = '\0';
+ /*
+ * traverse the list of heaps available in this system in priority
+ * order. If the heap type is supported by the client, and matches the
+ * request of the caller allocate from it. Repeat until allocate has
+ * succeeded or all heaps have been tried
+ */
if (WARN_ON(!len))
return ERR_PTR(-EINVAL);
@@ -392,14 +423,15 @@
mutex_lock(&dev->lock);
for (n = rb_first(&dev->heaps); n != NULL; n = rb_next(n)) {
struct ion_heap *heap = rb_entry(n, struct ion_heap, node);
-
+ /* if the client doesn't support this heap type */
if (!((1 << heap->type) & client->heap_mask))
continue;
-
- if (!((1 << heap->id) & flags))
+ /* if the caller didn't specify this heap type */
+ if (!((1 << heap->id) & heap_mask))
continue;
-
- if (secure_allocation && (heap->type != ION_HEAP_TYPE_CP))
+ /* Do not allow un-secure heap if secure is specified */
+ if (secure_allocation &&
+ (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP))
continue;
buffer = ion_buffer_create(heap, dev, len, align, flags);
if (!IS_ERR_OR_NULL(buffer))
@@ -409,13 +441,13 @@
int ret_value = snprintf(&dbg_str[dbg_str_idx],
len_left, "%s ", heap->name);
if (ret_value >= len_left) {
-
+ /* overflow */
dbg_str[MAX_DBG_STR_LEN-1] = '\0';
dbg_str_idx = MAX_DBG_STR_LEN;
} else if (ret_value >= 0) {
dbg_str_idx += ret_value;
} else {
-
+ /* error */
dbg_str[MAX_DBG_STR_LEN-1] = '\0';
}
}
@@ -433,9 +465,12 @@
return ERR_PTR(PTR_ERR(buffer));
}
- buffer->creator = client;
handle = ion_handle_create(client, buffer);
+ /*
+ * ion_buffer_create will create a buffer with a ref_cnt of 1,
+ * and ion_handle_create will take a second reference, drop one here
+ */
ion_buffer_put(buffer);
if (!IS_ERR(handle)) {
@@ -615,6 +650,10 @@
goto out;
}
+ /*
+ * If clients don't want a custom iova length, just use whatever
+ * the buffer size is
+ */
if (!iova_length)
iova_length = buffer->size;
@@ -717,8 +756,7 @@
}
EXPORT_SYMBOL(ion_unmap_iommu);
-void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle,
- unsigned long flags)
+void *ion_map_kernel(struct ion_client *client, struct ion_handle *handle)
{
struct ion_buffer *buffer;
void *vaddr;
@@ -740,11 +778,6 @@
return ERR_PTR(-ENODEV);
}
- if (ion_validate_buffer_flags(buffer, flags)) {
- mutex_unlock(&client->lock);
- return ERR_PTR(-EEXIST);
- }
-
mutex_lock(&buffer->lock);
vaddr = ion_handle_kmap_get(handle);
mutex_unlock(&buffer->lock);
@@ -766,31 +799,6 @@
}
EXPORT_SYMBOL(ion_unmap_kernel);
-static int check_vaddr_bounds(unsigned long start, unsigned long end)
-{
- struct mm_struct *mm = current->active_mm;
- struct vm_area_struct *vma;
- int ret = 1;
-
- if (end < start)
- goto out;
-
- down_read(&mm->mmap_sem);
- vma = find_vma(mm, start);
- if (vma && vma->vm_start < end) {
- if (start < vma->vm_start)
- goto out_up;
- if (end > vma->vm_end)
- goto out_up;
- ret = 0;
- }
-
-out_up:
- up_read(&mm->mmap_sem);
-out:
- return ret;
-}
-
int ion_do_cache_op(struct ion_client *client, struct ion_handle *handle,
void *uaddr, unsigned long offset, unsigned long len,
unsigned int cmd)
@@ -856,7 +864,7 @@
if (type == ION_HEAP_TYPE_SYSTEM_CONTIG ||
type == ION_HEAP_TYPE_CARVEOUT ||
- type == ION_HEAP_TYPE_CP)
+ type == (enum ion_heap_type) ION_HEAP_TYPE_CP)
seq_printf(s, " : %12lx", handle->buffer->priv_phys);
else
seq_printf(s, " : %12s", "N/A");
@@ -910,6 +918,8 @@
get_task_struct(current->group_leader);
task_lock(current->group_leader);
pid = task_pid_nr(current->group_leader);
+ /* don't bother to store task struct for kernel threads,
+ they can't be killed anyway */
if (current->group_leader->flags & PF_KTHREAD) {
put_task_struct(current->group_leader);
task = NULL;
@@ -962,7 +972,6 @@
&debug_client_fops);
mutex_unlock(&dev->lock);
- pr_info("%s: create ion_client (%s) at %p\n", __func__, client->name, client);
return client;
}
@@ -971,13 +980,11 @@
struct ion_device *dev = client->dev;
struct rb_node *n;
- pr_info("%s: destroy ion_client %p (%s)\n", __func__, client, client->name);
+ pr_debug("%s: %d\n", __func__, __LINE__);
while ((n = rb_first(&client->handles))) {
struct ion_handle *handle = rb_entry(n, struct ion_handle,
node);
- mutex_lock(&client->lock);
ion_handle_destroy(&handle->ref);
- mutex_unlock(&client->lock);
}
mutex_lock(&dev->lock);
if (client->task)
@@ -1112,7 +1119,7 @@
}
mutex_lock(&buffer->lock);
-
+ /* now map it to userspace */
ret = buffer->heap->ops->map_user(buffer->heap, buffer, vma);
if (ret) {
@@ -1124,6 +1131,10 @@
mutex_unlock(&buffer->lock);
vma->vm_ops = &ion_vm_ops;
+ /*
+ * move the buffer into the vm_private_data so we can access it
+ * from vma_open/close
+ */
vma->vm_private_data = buffer;
}
return ret;
@@ -1200,9 +1211,12 @@
{
struct ion_buffer *buffer;
bool valid_handle;
- unsigned long ion_flags = ION_SET_CACHE(CACHED);
+ unsigned long ion_flags = 0;
if (flags & O_DSYNC)
- ion_flags = ION_SET_CACHE(UNCACHED);
+ ion_flags = ION_SET_UNCACHED(ion_flags);
+ else
+ ion_flags = ION_SET_CACHED(ion_flags);
+
mutex_lock(&client->lock);
valid_handle = ion_handle_validate(client, handle);
@@ -1214,12 +1228,6 @@
buffer = handle->buffer;
- mutex_lock(&buffer->lock);
- if (ion_validate_buffer_flags(buffer, ion_flags)) {
- mutex_unlock(&buffer->lock);
- return -EEXIST;
- }
- mutex_unlock(&buffer->lock);
return 0;
}
@@ -1263,7 +1271,7 @@
dmabuf = dma_buf_get(fd);
if (IS_ERR_OR_NULL(dmabuf))
return ERR_PTR(PTR_ERR(dmabuf));
-
+ /* if this memory came from ion */
if (dmabuf->ops != &dma_buf_ops) {
pr_err("%s: can not import dmabuf from another exporter\n",
@@ -1274,7 +1282,7 @@
buffer = dmabuf->priv;
mutex_lock(&client->lock);
-
+ /* if a handle exists for this buffer just take a reference to it */
handle = ion_handle_lookup(client, buffer);
if (!IS_ERR_OR_NULL(handle)) {
ion_handle_get(handle);
@@ -1303,7 +1311,7 @@
if (copy_from_user(&data, (void __user *)arg, sizeof(data)))
return -EFAULT;
data.handle = ion_alloc(client, data.len, data.align,
- data.flags);
+ data.heap_mask, data.flags);
if (IS_ERR(data.handle))
return PTR_ERR(data.handle);
@@ -1357,8 +1365,10 @@
sizeof(struct ion_fd_data)))
return -EFAULT;
data.handle = ion_import_dma_buf(client, data.fd);
- if (IS_ERR(data.handle))
+ if (IS_ERR(data.handle)) {
+ ret = PTR_ERR(data.handle);
data.handle = NULL;
+ }
if (copy_to_user((void __user *)arg, &data,
sizeof(struct ion_fd_data)))
return -EFAULT;
@@ -1379,65 +1389,17 @@
return dev->custom_ioctl(client, data.cmd, data.arg);
}
case ION_IOC_CLEAN_CACHES:
+ return client->dev->custom_ioctl(client,
+ ION_IOC_CLEAN_CACHES, arg);
case ION_IOC_INV_CACHES:
+ return client->dev->custom_ioctl(client,
+ ION_IOC_INV_CACHES, arg);
case ION_IOC_CLEAN_INV_CACHES:
- {
- struct ion_flush_data data;
- unsigned long start, end;
- struct ion_handle *handle = NULL;
- int ret;
-
- if (copy_from_user(&data, (void __user *)arg,
- sizeof(struct ion_flush_data)))
- return -EFAULT;
-
- start = (unsigned long) data.vaddr;
- end = (unsigned long) data.vaddr + data.length;
-
- if (check_vaddr_bounds(start, end)) {
- pr_err("%s: virtual address %p is out of bounds\n",
- __func__, data.vaddr);
- return -EINVAL;
- }
-
- if (!data.handle) {
- handle = ion_import_dma_buf(client, data.fd);
- if (IS_ERR(handle)) {
- pr_info("%s: Could not import handle: %d\n",
- __func__, (int)handle);
- return -EINVAL;
- }
- }
-
- ret = ion_do_cache_op(client,
- data.handle ? data.handle : handle,
- data.vaddr, data.offset, data.length,
- cmd);
-
- if (!data.handle)
- ion_free(client, handle);
-
- if (ret < 0)
- return ret;
- break;
-
- }
+ return client->dev->custom_ioctl(client,
+ ION_IOC_CLEAN_INV_CACHES, arg);
case ION_IOC_GET_FLAGS:
- {
- struct ion_flag_data data;
- int ret;
- if (copy_from_user(&data, (void __user *)arg,
- sizeof(struct ion_flag_data)))
- return -EFAULT;
-
- ret = ion_handle_get_flags(client, data.handle, &data.flags);
- if (ret < 0)
- return ret;
- if (copy_to_user((void __user *)arg, &data,
- sizeof(struct ion_flag_data)))
- return -EFAULT;
- break;
- }
+ return client->dev->custom_ioctl(client,
+ ION_IOC_GET_FLAGS, arg);
default:
return -ENOTTY;
}
@@ -1459,15 +1421,9 @@
struct ion_device *dev = container_of(miscdev, struct ion_device, dev);
struct ion_client *client;
char debug_name[64];
- char task_comm[TASK_COMM_LEN];
pr_debug("%s: %d\n", __func__, __LINE__);
- if (current->group_leader->flags & PF_KTHREAD) {
- snprintf(debug_name, 64, "%u", task_pid_nr(current->group_leader));
- } else {
- strcpy(debug_name, get_task_comm(task_comm, current->group_leader));
- }
-
+ snprintf(debug_name, 64, "%u", task_pid_nr(current->group_leader));
client = ion_client_create(dev, -1, debug_name);
if (IS_ERR_OR_NULL(client))
return PTR_ERR(client);
@@ -1501,6 +1457,13 @@
return size;
}
+/**
+ * Searches through a clients handles to find if the buffer is owned
+ * by this client. Used for debug output.
+ * @param client pointer to candidate owner of buffer
+ * @param buf pointer to buffer that we are trying to find the owner of
+ * @return 1 if found, 0 otherwise
+ */
static int ion_debug_find_buffer_owner(const struct ion_client *client,
const struct ion_buffer *buf)
{
@@ -1516,6 +1479,12 @@
return 0;
}
+/**
+ * Adds mem_map_data pointer to the tree of mem_map
+ * Used for debug output.
+ * @param mem_map The mem_map tree
+ * @param data The new data to add to the tree
+ */
static void ion_debug_mem_map_add(struct rb_root *mem_map,
struct mem_map_data *data)
{
@@ -1540,6 +1509,12 @@
rb_insert_color(&data->node, mem_map);
}
+/**
+ * Search for an owner of a buffer by iterating over all ION clients.
+ * @param dev ion device containing pointers to all the clients.
+ * @param buffer pointer to buffer we are trying to find the owner of.
+ * @return name of owner.
+ */
const char *ion_debug_locate_owner(const struct ion_device *dev,
const struct ion_buffer *buffer)
{
@@ -1556,6 +1531,12 @@
return client_name;
}
+/**
+ * Create a mem_map of the heap.
+ * @param s seq_file to log error message to.
+ * @param heap The heap to create mem_map for.
+ * @param mem_map The mem map to be created.
+ */
void ion_debug_mem_map_create(struct seq_file *s, struct ion_heap *heap,
struct rb_root *mem_map)
{
@@ -1573,32 +1554,19 @@
"Part of memory map will not be logged\n");
break;
}
- if (heap->id == ION_IOMMU_HEAP_ID) {
- data->addr = (unsigned long)buffer;
- } else {
- data->addr = buffer->priv_phys;
- data->addr_end = buffer->priv_phys + buffer->size-1;
- }
+ data->addr = buffer->priv_phys;
+ data->addr_end = buffer->priv_phys + buffer->size-1;
data->size = buffer->size;
data->client_name = ion_debug_locate_owner(dev, buffer);
-
- {
-
- struct rb_node *p = NULL;
- struct ion_client *entry = NULL;
-
- for (p = rb_first(&dev->clients); p && !data->creator_name;
- p = rb_next(p)) {
- entry = rb_entry(p, struct ion_client, node);
- if (entry == buffer->creator)
- data->creator_name = entry->name;
- }
- }
ion_debug_mem_map_add(mem_map, data);
}
}
}
+/**
+ * Free the memory allocated by ion_debug_mem_map_create
+ * @param mem_map The mem map to free.
+ */
static void ion_debug_mem_map_destroy(struct rb_root *mem_map)
{
if (mem_map) {
@@ -1612,6 +1580,11 @@
}
}
+/**
+ * Print heap debug information.
+ * @param s seq_file to log message to.
+ * @param heap pointer to heap that we will print debug information for.
+ */
static void ion_heap_print_debug(struct seq_file *s, struct ion_heap *heap)
{
if (heap->ops->print_debug) {
@@ -1707,10 +1680,14 @@
struct rb_node *n;
int ret_val = 0;
+ /*
+ * traverse the list of heaps available in this system
+ * and find the heap that is specified.
+ */
mutex_lock(&dev->lock);
for (n = rb_first(&dev->heaps); n != NULL; n = rb_next(n)) {
struct ion_heap *heap = rb_entry(n, struct ion_heap, node);
- if (heap->type != ION_HEAP_TYPE_CP)
+ if (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP)
continue;
if (ION_HEAP(heap->id) != heap_id)
continue;
@@ -1731,10 +1708,14 @@
struct rb_node *n;
int ret_val = 0;
+ /*
+ * traverse the list of heaps available in this system
+ * and find the heap that is specified.
+ */
mutex_lock(&dev->lock);
for (n = rb_first(&dev->heaps); n != NULL; n = rb_next(n)) {
struct ion_heap *heap = rb_entry(n, struct ion_heap, node);
- if (heap->type != ION_HEAP_TYPE_CP)
+ if (heap->type != (enum ion_heap_type) ION_HEAP_TYPE_CP)
continue;
if (ION_HEAP(heap->id) != heap_id)
continue;
@@ -1755,8 +1736,8 @@
struct rb_node *n;
struct rb_node *n2;
-
- seq_printf(s, "%16.s %12.s %16.s %16.s %16.s\n", "buffer", "physical", "heap", "size",
+ /* mark all buffers as 1 */
+ seq_printf(s, "%16.s %16.s %16.s %16.s\n", "buffer", "heap", "size",
"ref cnt");
mutex_lock(&dev->lock);
for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
@@ -1766,7 +1747,7 @@
buf->marked = 1;
}
-
+ /* now see which buffers we can access */
for (n = rb_first(&dev->clients); n; n = rb_next(n)) {
struct ion_client *client = rb_entry(n, struct ion_client,
node);
@@ -1783,26 +1764,15 @@
}
-
+ /* And anyone still marked as a 1 means a leaked handle somewhere */
for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
struct ion_buffer *buf = rb_entry(n, struct ion_buffer,
node);
- enum ion_heap_type type = buf->heap->type;
- if (buf->marked == 1) {
- seq_printf(s, "%16.x", (int)buf);
-
- if (type == ION_HEAP_TYPE_SYSTEM_CONTIG ||
- type == ION_HEAP_TYPE_CARVEOUT ||
- type == ION_HEAP_TYPE_CP)
- seq_printf(s, " %12lx", buf->priv_phys);
- else
- seq_printf(s, " %12s", "N/A");
-
- seq_printf(s, " %16.s %16.x %16.d\n",
- buf->heap->name, buf->size,
+ if (buf->marked == 1)
+ seq_printf(s, "%16.x %16.s %16.x %16.d\n",
+ (int)buf, buf->heap->name, buf->size,
atomic_read(&buf->ref.refcount));
- }
}
mutex_unlock(&dev->lock);
return 0;
@@ -1861,7 +1831,7 @@
void ion_device_destroy(struct ion_device *dev)
{
misc_deregister(&dev->dev);
-
+ /* XXX need to free the heaps and clients ? */
kfree(dev);
}
diff --git a/drivers/gpu/ion/ion_carveout_heap.c b/drivers/gpu/ion/ion_carveout_heap.c
index 5f7fe37..a808cc9 100644
--- a/drivers/gpu/ion/ion_carveout_heap.c
+++ b/drivers/gpu/ion/ion_carveout_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_carveout_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -23,7 +23,6 @@
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include <linux/iommu.h>
#include <linux/seq_file.h>
#include "ion_priv.h"
@@ -31,6 +30,7 @@
#include <mach/iommu_domains.h>
#include <asm/mach/map.h>
#include <asm/cacheflush.h>
+#include <linux/msm_ion.h>
struct ion_carveout_heap {
struct ion_heap heap;
@@ -286,19 +286,18 @@
struct rb_node *n;
seq_printf(s, "\nMemory Map\n");
- seq_printf(s, "%16.s %16.s %14.s %14.s %14.s\n",
- "client", "creator", "start address", "end address",
+ seq_printf(s, "%16.s %14.s %14.s %14.s\n",
+ "client", "start address", "end address",
"size (hex)");
for (n = rb_first(mem_map); n; n = rb_next(n)) {
struct mem_map_data *data =
rb_entry(n, struct mem_map_data, node);
const char *client_name = "(null)";
- const char *creator_name = "(null)";
if (last_end < data->addr) {
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n",
- "FREE", "NA", last_end, data->addr-1,
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
+ "FREE", last_end, data->addr-1,
data->addr-last_end,
data->addr-last_end);
}
@@ -306,17 +305,14 @@
if (data->client_name)
client_name = data->client_name;
- if (data->creator_name)
- creator_name = data->creator_name;
-
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n",
- client_name, creator_name, data->addr,
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
+ client_name, data->addr,
data->addr_end,
data->size, data->size);
last_end = data->addr_end+1;
}
if (last_end < end) {
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n", "FREE", "NA",
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n", "FREE",
last_end, end-1, end-last_end, end-last_end);
}
}
@@ -361,7 +357,7 @@
goto out1;
}
- sglist = vmalloc(sizeof(*sglist));
+ sglist = kmalloc(sizeof(*sglist), GFP_KERNEL);
if (!sglist)
goto out1;
@@ -385,13 +381,13 @@
if (ret)
goto out2;
}
- vfree(sglist);
+ kfree(sglist);
return ret;
out2:
iommu_unmap_range(domain, data->iova_addr, buffer->size);
out1:
- vfree(sglist);
+ kfree(sglist);
msm_free_iova_address(data->iova_addr, domain_num, partition_num,
data->mapped_size);
diff --git a/drivers/gpu/ion/ion_cma_heap.c b/drivers/gpu/ion/ion_cma_heap.c
new file mode 100644
index 0000000..bef6b6f
--- /dev/null
+++ b/drivers/gpu/ion/ion_cma_heap.c
@@ -0,0 +1,342 @@
+/*
+ * drivers/gpu/ion/ion_cma_heap.c
+ *
+ * Copyright (C) Linaro 2012
+ * Author: <benjamin.gaignard@linaro.org> for ST-Ericsson.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/ion.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/err.h>
+#include <linux/dma-mapping.h>
+#include <linux/msm_ion.h>
+#include <mach/iommu_domains.h>
+
+#include <asm/cacheflush.h>
+
+/* for ion_heap_ops structure */
+#include "ion_priv.h"
+
+#define ION_CMA_ALLOCATE_FAILED -1
+
+struct ion_cma_buffer_info {
+ void *cpu_addr;
+ dma_addr_t handle;
+ struct sg_table *table;
+ bool is_cached;
+};
+
+static int cma_heap_has_outer_cache;
+/*
+ * Create scatter-list for the already allocated DMA buffer.
+ * This function could be replace by dma_common_get_sgtable
+ * as soon as it will avalaible.
+ */
+int ion_cma_get_sgtable(struct device *dev, struct sg_table *sgt,
+ void *cpu_addr, dma_addr_t handle, size_t size)
+{
+ struct page *page = virt_to_page(cpu_addr);
+ int ret;
+
+ ret = sg_alloc_table(sgt, 1, GFP_KERNEL);
+ if (unlikely(ret))
+ return ret;
+
+ sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0);
+ return 0;
+}
+
+/* ION CMA heap operations functions */
+static int ion_cma_allocate(struct ion_heap *heap, struct ion_buffer *buffer,
+ unsigned long len, unsigned long align,
+ unsigned long flags)
+{
+ struct device *dev = heap->priv;
+ struct ion_cma_buffer_info *info;
+
+ dev_dbg(dev, "Request buffer allocation len %ld\n", len);
+
+ info = kzalloc(sizeof(struct ion_cma_buffer_info), GFP_KERNEL);
+ if (!info) {
+ dev_err(dev, "Can't allocate buffer info\n");
+ return ION_CMA_ALLOCATE_FAILED;
+ }
+
+ if (!ION_IS_CACHED(flags))
+ info->cpu_addr = dma_alloc_writecombine(dev, len,
+ &(info->handle), 0);
+ else
+ info->cpu_addr = dma_alloc_nonconsistent(dev, len,
+ &(info->handle), 0);
+
+ if (!info->cpu_addr) {
+ dev_err(dev, "Fail to allocate buffer\n");
+ goto err;
+ }
+
+ info->table = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+ if (!info->table) {
+ dev_err(dev, "Fail to allocate sg table\n");
+ goto err;
+ }
+
+ info->is_cached = ION_IS_CACHED(flags);
+
+ ion_cma_get_sgtable(dev,
+ info->table, info->cpu_addr, info->handle, len);
+
+ /* keep this for memory release */
+ buffer->priv_virt = info;
+ dev_dbg(dev, "Allocate buffer %p\n", buffer);
+ return 0;
+
+err:
+ kfree(info);
+ return ION_CMA_ALLOCATE_FAILED;
+}
+
+static void ion_cma_free(struct ion_buffer *buffer)
+{
+ struct device *dev = buffer->heap->priv;
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ dev_dbg(dev, "Release buffer %p\n", buffer);
+ /* release memory */
+ dma_free_coherent(dev, buffer->size, info->cpu_addr, info->handle);
+ /* release sg table */
+ kfree(info->table);
+ kfree(info);
+}
+
+/* return physical address in addr */
+static int ion_cma_phys(struct ion_heap *heap, struct ion_buffer *buffer,
+ ion_phys_addr_t *addr, size_t *len)
+{
+ struct device *dev = heap->priv;
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ dev_dbg(dev, "Return buffer %p physical address 0x%x\n", buffer,
+ info->handle);
+
+ *addr = info->handle;
+ *len = buffer->size;
+
+ return 0;
+}
+
+struct sg_table *ion_cma_heap_map_dma(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ return info->table;
+}
+
+void ion_cma_heap_unmap_dma(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return;
+}
+
+static int ion_cma_mmap(struct ion_heap *mapper, struct ion_buffer *buffer,
+ struct vm_area_struct *vma)
+{
+ struct device *dev = buffer->heap->priv;
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ if (info->is_cached)
+ return dma_mmap_nonconsistent(dev, vma, info->cpu_addr,
+ info->handle, buffer->size);
+ else
+ return dma_mmap_writecombine(dev, vma, info->cpu_addr,
+ info->handle, buffer->size);
+}
+
+static void *ion_cma_map_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ return info->cpu_addr;
+}
+
+static void ion_cma_unmap_kernel(struct ion_heap *heap,
+ struct ion_buffer *buffer)
+{
+ return;
+}
+
+int ion_cma_map_iommu(struct ion_buffer *buffer,
+ struct ion_iommu_map *data,
+ unsigned int domain_num,
+ unsigned int partition_num,
+ unsigned long align,
+ unsigned long iova_length,
+ unsigned long flags)
+{
+ int ret = 0;
+ struct iommu_domain *domain;
+ unsigned long extra;
+ unsigned long extra_iova_addr;
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+ struct sg_table *table = info->table;
+ int prot = IOMMU_WRITE | IOMMU_READ;
+
+ data->mapped_size = iova_length;
+
+ if (!msm_use_iommu()) {
+ data->iova_addr = info->handle;
+ return 0;
+ }
+
+ extra = iova_length - buffer->size;
+
+ ret = msm_allocate_iova_address(domain_num, partition_num,
+ data->mapped_size, align,
+ &data->iova_addr);
+
+ if (ret)
+ goto out;
+
+ domain = msm_get_iommu_domain(domain_num);
+
+ if (!domain) {
+ ret = -EINVAL;
+ goto out1;
+ }
+
+ ret = iommu_map_range(domain, data->iova_addr, table->sgl,
+ buffer->size, prot);
+
+ if (ret) {
+ pr_err("%s: could not map %lx in domain %p\n",
+ __func__, data->iova_addr, domain);
+ goto out1;
+ }
+
+ extra_iova_addr = data->iova_addr + buffer->size;
+ if (extra) {
+ ret = msm_iommu_map_extra(domain, extra_iova_addr, extra, SZ_4K,
+ prot);
+ if (ret)
+ goto out2;
+ }
+ return ret;
+
+out2:
+ iommu_unmap_range(domain, data->iova_addr, buffer->size);
+out1:
+ msm_free_iova_address(data->iova_addr, domain_num, partition_num,
+ data->mapped_size);
+out:
+ return ret;
+}
+
+
+void ion_cma_unmap_iommu(struct ion_iommu_map *data)
+{
+ unsigned int domain_num;
+ unsigned int partition_num;
+ struct iommu_domain *domain;
+
+ if (!msm_use_iommu())
+ return;
+
+ domain_num = iommu_map_domain(data);
+ partition_num = iommu_map_partition(data);
+
+ domain = msm_get_iommu_domain(domain_num);
+
+ if (!domain) {
+ WARN(1, "Could not get domain %d. Corruption?\n", domain_num);
+ return;
+ }
+
+ iommu_unmap_range(domain, data->iova_addr, data->mapped_size);
+ msm_free_iova_address(data->iova_addr, domain_num, partition_num,
+ data->mapped_size);
+
+ return;
+}
+
+int ion_cma_cache_ops(struct ion_heap *heap,
+ struct ion_buffer *buffer, void *vaddr,
+ unsigned int offset, unsigned int length,
+ unsigned int cmd)
+{
+ void (*outer_cache_op)(phys_addr_t, phys_addr_t);
+
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ dmac_clean_range(vaddr, vaddr + length);
+ outer_cache_op = outer_clean_range;
+ break;
+ case ION_IOC_INV_CACHES:
+ dmac_inv_range(vaddr, vaddr + length);
+ outer_cache_op = outer_inv_range;
+ break;
+ case ION_IOC_CLEAN_INV_CACHES:
+ dmac_flush_range(vaddr, vaddr + length);
+ outer_cache_op = outer_flush_range;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ if (cma_heap_has_outer_cache) {
+ struct ion_cma_buffer_info *info = buffer->priv_virt;
+
+ outer_cache_op(info->handle, info->handle + length);
+ }
+
+ return 0;
+}
+
+static struct ion_heap_ops ion_cma_ops = {
+ .allocate = ion_cma_allocate,
+ .free = ion_cma_free,
+ .map_dma = ion_cma_heap_map_dma,
+ .unmap_dma = ion_cma_heap_unmap_dma,
+ .phys = ion_cma_phys,
+ .map_user = ion_cma_mmap,
+ .map_kernel = ion_cma_map_kernel,
+ .unmap_kernel = ion_cma_unmap_kernel,
+ .map_iommu = ion_cma_map_iommu,
+ .unmap_iommu = ion_cma_unmap_iommu,
+ .cache_op = ion_cma_cache_ops,
+};
+
+struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *data)
+{
+ struct ion_heap *heap;
+
+ heap = kzalloc(sizeof(struct ion_heap), GFP_KERNEL);
+
+ if (!heap)
+ return ERR_PTR(-ENOMEM);
+
+ heap->ops = &ion_cma_ops;
+ /* set device as private heaps data, later it will be
+ * used to make the link with reserved CMA memory */
+ heap->priv = data->priv;
+ heap->type = ION_HEAP_TYPE_DMA;
+ cma_heap_has_outer_cache = data->has_outer_cache;
+ return heap;
+}
+
+void ion_cma_heap_destroy(struct ion_heap *heap)
+{
+ kfree(heap);
+}
diff --git a/drivers/gpu/ion/ion_cp_heap.c b/drivers/gpu/ion/ion_cp_heap.c
index d0d6a95..017b756 100644
--- a/drivers/gpu/ion/ion_cp_heap.c
+++ b/drivers/gpu/ion/ion_cp_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_cp_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -15,19 +15,19 @@
*
*/
#include <linux/spinlock.h>
-
+#include <linux/delay.h>
#include <linux/err.h>
#include <linux/genalloc.h>
#include <linux/io.h>
-#include <linux/ion.h>
+#include <linux/msm_ion.h>
#include <linux/mm.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
-#include <linux/vmalloc.h>
#include <linux/memory_alloc.h>
#include <linux/seq_file.h>
#include <linux/fmem.h>
#include <linux/iommu.h>
+#include <linux/dma-mapping.h>
#include <asm/mach/map.h>
@@ -41,6 +41,39 @@
#include <asm/cacheflush.h>
#include "msm/ion_cp_common.h"
+/**
+ * struct ion_cp_heap - container for the heap and shared heap data
+
+ * @heap: the heap information structure
+ * @pool: memory pool to allocate from.
+ * @base: the base address of the memory pool.
+ * @permission_type: Identifier for the memory used by SCM for protecting
+ * and unprotecting memory.
+ * @secure_base: Base address used when securing a heap that is shared.
+ * @secure_size: Size used when securing a heap that is shared.
+ * @lock: mutex to protect shared access.
+ * @heap_protected: Indicates whether heap has been protected or not.
+ * @allocated_bytes: the total number of allocated bytes from the pool.
+ * @total_size: the total size of the memory pool.
+ * @request_region: function pointer to call when first mapping of memory
+ * occurs.
+ * @release_region: function pointer to call when last mapping of memory
+ * unmapped.
+ * @bus_id: token used with request/release region.
+ * @kmap_cached_count: the total number of times this heap has been mapped in
+ * kernel space (cached).
+ * @kmap_uncached_count:the total number of times this heap has been mapped in
+ * kernel space (un-cached).
+ * @umap_count: the total number of times this heap has been mapped in
+ * user space.
+ * @iommu_iova: saved iova when mapping full heap at once.
+ * @iommu_partition: partition used to map full heap.
+ * @reusable: indicates if the memory should be reused via fmem.
+ * @reserved_vrange: reserved virtual address range for use with fmem
+ * @iommu_map_all: Indicates whether we should map whole heap into IOMMU.
+ * @iommu_2x_map_domain: Indicates the domain to use for overmapping.
+ * @has_outer_cache: set to 1 if outer cache is used, 0 otherwise.
+*/
struct ion_cp_heap {
struct ion_heap heap;
struct gen_pool *pool;
@@ -52,8 +85,8 @@
unsigned int heap_protected;
unsigned long allocated_bytes;
unsigned long total_size;
- int (*request_region)(void *);
- int (*release_region)(void *);
+ int (*heap_request_region)(void *);
+ int (*heap_release_region)(void *);
void *bus_id;
unsigned long kmap_cached_count;
unsigned long kmap_uncached_count;
@@ -66,6 +99,11 @@
int iommu_2x_map_domain;
unsigned int has_outer_cache;
atomic_t protect_cnt;
+ void *cpu_addr;
+ size_t heap_size;
+ dma_addr_t handle;
+ int cma;
+ int disallow_non_secure_allocation;
};
enum {
@@ -73,6 +111,8 @@
HEAP_PROTECTED = 1,
};
+#define DMA_ALLOC_RETRIES 5
+
static int ion_cp_protect_mem(unsigned int phy_base, unsigned int size,
unsigned int permission_type, int version,
void *data);
@@ -81,12 +121,120 @@
unsigned int permission_type, int version,
void *data);
+static int allocate_heap_memory(struct ion_heap *heap)
+{
+ struct device *dev = heap->priv;
+ struct ion_cp_heap *cp_heap =
+ container_of(heap, struct ion_cp_heap, heap);
+ int ret;
+ int tries = 0;
+ DEFINE_DMA_ATTRS(attrs);
+ dma_set_attr(DMA_ATTR_NO_KERNEL_MAPPING, &attrs);
+
+
+ if (cp_heap->cpu_addr)
+ return 0;
+
+ while (!cp_heap->cpu_addr && (++tries < DMA_ALLOC_RETRIES)) {
+ cp_heap->cpu_addr = dma_alloc_attrs(dev,
+ cp_heap->heap_size,
+ &(cp_heap->handle),
+ 0,
+ &attrs);
+ if (!cp_heap->cpu_addr)
+ msleep(20);
+ }
+
+ if (!cp_heap->cpu_addr)
+ goto out;
+
+ cp_heap->base = cp_heap->handle;
+
+ cp_heap->pool = gen_pool_create(12, -1);
+ if (!cp_heap->pool)
+ goto out_free;
+
+ ret = gen_pool_add(cp_heap->pool, cp_heap->base,
+ cp_heap->heap_size, -1);
+ if (ret < 0)
+ goto out_pool;
+
+ return 0;
+
+out_pool:
+ gen_pool_destroy(cp_heap->pool);
+out_free:
+ dma_free_coherent(dev, cp_heap->heap_size, cp_heap->cpu_addr,
+ cp_heap->handle);
+out:
+ return ION_CP_ALLOCATE_FAIL;
+}
+
+static void free_heap_memory(struct ion_heap *heap)
+{
+ struct device *dev = heap->priv;
+ struct ion_cp_heap *cp_heap =
+ container_of(heap, struct ion_cp_heap, heap);
+
+ /* release memory */
+ dma_free_coherent(dev, cp_heap->heap_size, cp_heap->cpu_addr,
+ cp_heap->handle);
+ gen_pool_destroy(cp_heap->pool);
+ cp_heap->pool = NULL;
+ cp_heap->cpu_addr = 0;
+}
+
+
+
+/**
+ * Get the total number of kernel mappings.
+ * Must be called with heap->lock locked.
+ */
static unsigned long ion_cp_get_total_kmap_count(
const struct ion_cp_heap *cp_heap)
{
return cp_heap->kmap_cached_count + cp_heap->kmap_uncached_count;
}
+static int ion_on_first_alloc(struct ion_heap *heap)
+{
+ struct ion_cp_heap *cp_heap =
+ container_of(heap, struct ion_cp_heap, heap);
+ int ret_value;
+
+ if (cp_heap->reusable) {
+ ret_value = fmem_set_state(FMEM_C_STATE);
+ if (ret_value)
+ return 1;
+ }
+
+ if (cp_heap->cma) {
+ ret_value = allocate_heap_memory(heap);
+ if (ret_value)
+ return 1;
+ }
+ return 0;
+}
+
+static void ion_on_last_free(struct ion_heap *heap)
+{
+ struct ion_cp_heap *cp_heap =
+ container_of(heap, struct ion_cp_heap, heap);
+
+ if (cp_heap->reusable)
+ if (fmem_set_state(FMEM_T_STATE) != 0)
+ pr_err("%s: unable to transition heap to T-state\n",
+ __func__);
+
+ if (cp_heap->cma)
+ free_heap_memory(heap);
+}
+
+/**
+ * Protects memory if heap is unsecured heap. Also ensures that we are in
+ * the correct FMEM state if this heap is a reusable heap.
+ * Must be called with heap->lock locked.
+ */
static int ion_cp_protect(struct ion_heap *heap, int version, void *data)
{
struct ion_cp_heap *cp_heap =
@@ -94,12 +242,10 @@
int ret_value = 0;
if (atomic_inc_return(&cp_heap->protect_cnt) == 1) {
-
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- ret_value = fmem_set_state(FMEM_C_STATE);
- if (ret_value)
+ /* Make sure we are in C state when the heap is protected. */
+ if (!cp_heap->allocated_bytes)
+ if (ion_on_first_alloc(heap))
goto out;
- }
ret_value = ion_cp_protect_mem(cp_heap->secure_base,
cp_heap->secure_size, cp_heap->permission_type,
@@ -108,11 +254,9 @@
pr_err("Failed to protect memory for heap %s - "
"error code: %d\n", heap->name, ret_value);
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state\n",
- __func__);
- }
+ if (!cp_heap->allocated_bytes)
+ ion_on_last_free(heap);
+
atomic_dec(&cp_heap->protect_cnt);
} else {
cp_heap->heap_protected = HEAP_PROTECTED;
@@ -127,6 +271,11 @@
return ret_value;
}
+/**
+ * Unprotects memory if heap is secure heap. Also ensures that we are in
+ * the correct FMEM state if this heap is a reusable heap.
+ * Must be called with heap->lock locked.
+ */
static void ion_cp_unprotect(struct ion_heap *heap, int version, void *data)
{
struct ion_cp_heap *cp_heap =
@@ -144,11 +293,8 @@
pr_debug("Un-protected heap %s @ 0x%x\n", heap->name,
(unsigned int) cp_heap->base);
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state",
- __func__);
- }
+ if (!cp_heap->allocated_bytes)
+ ion_on_last_free(heap);
}
}
pr_debug("%s: protect count is %d\n", __func__,
@@ -163,6 +309,7 @@
{
unsigned long offset;
unsigned long secure_allocation = flags & ION_SECURE;
+ unsigned long force_contig = flags & ION_FORCE_CONTIGUOUS;
struct ion_cp_heap *cp_heap =
container_of(heap, struct ion_cp_heap, heap);
@@ -175,6 +322,14 @@
return ION_CP_ALLOCATE_FAIL;
}
+ if (!force_contig && !secure_allocation &&
+ cp_heap->disallow_non_secure_allocation) {
+ mutex_unlock(&cp_heap->lock);
+ pr_debug("%s: non-secure allocation disallowed from this heap\n",
+ __func__);
+ return ION_CP_ALLOCATE_FAIL;
+ }
+
if (secure_allocation &&
(cp_heap->umap_count > 0 || cp_heap->kmap_cached_count > 0)) {
mutex_unlock(&cp_heap->lock);
@@ -185,12 +340,15 @@
return ION_CP_ALLOCATE_FAIL;
}
- if (cp_heap->reusable && !cp_heap->allocated_bytes) {
- if (fmem_set_state(FMEM_C_STATE) != 0) {
+ /*
+ * if this is the first reusable allocation, transition
+ * the heap
+ */
+ if (!cp_heap->allocated_bytes)
+ if (ion_on_first_alloc(heap)) {
mutex_unlock(&cp_heap->lock);
return ION_RESERVED_ALLOCATE_FAIL;
}
- }
cp_heap->allocated_bytes += size;
mutex_unlock(&cp_heap->lock);
@@ -209,13 +367,9 @@
__func__, heap->name,
cp_heap->total_size -
cp_heap->allocated_bytes, size);
-
- if (cp_heap->reusable && !cp_heap->allocated_bytes &&
- cp_heap->heap_protected == HEAP_NOT_PROTECTED) {
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state\n",
- __func__);
- }
+ if (!cp_heap->allocated_bytes &&
+ cp_heap->heap_protected == HEAP_NOT_PROTECTED)
+ ion_on_last_free(heap);
mutex_unlock(&cp_heap->lock);
return ION_CP_ALLOCATE_FAIL;
@@ -260,14 +414,11 @@
mutex_lock(&cp_heap->lock);
cp_heap->allocated_bytes -= size;
- if (cp_heap->reusable && !cp_heap->allocated_bytes &&
- cp_heap->heap_protected == HEAP_NOT_PROTECTED) {
- if (fmem_set_state(FMEM_T_STATE) != 0)
- pr_err("%s: unable to transition heap to T-state\n",
- __func__);
- }
+ if (!cp_heap->allocated_bytes &&
+ cp_heap->heap_protected == HEAP_NOT_PROTECTED)
+ ion_on_last_free(heap);
-
+ /* Unmap everything if we previously mapped the whole heap at once. */
if (!cp_heap->allocated_bytes) {
unsigned int i;
for (i = 0; i < MAX_DOMAINS; ++i) {
@@ -353,21 +504,29 @@
buffer->sg_table = 0;
}
+/**
+ * Call request region for SMI memory of this is the first mapping.
+ */
static int ion_cp_request_region(struct ion_cp_heap *cp_heap)
{
int ret_value = 0;
if ((cp_heap->umap_count + ion_cp_get_total_kmap_count(cp_heap)) == 0)
- if (cp_heap->request_region)
- ret_value = cp_heap->request_region(cp_heap->bus_id);
+ if (cp_heap->heap_request_region)
+ ret_value = cp_heap->heap_request_region(
+ cp_heap->bus_id);
return ret_value;
}
+/**
+ * Call release region for SMI memory of this is the last un-mapping.
+ */
static int ion_cp_release_region(struct ion_cp_heap *cp_heap)
{
int ret_value = 0;
if ((cp_heap->umap_count + ion_cp_get_total_kmap_count(cp_heap)) == 0)
- if (cp_heap->release_region)
- ret_value = cp_heap->release_region(cp_heap->bus_id);
+ if (cp_heap->heap_release_region)
+ ret_value = cp_heap->heap_release_region(
+ cp_heap->bus_id);
return ret_value;
}
@@ -412,7 +571,24 @@
if (cp_heap->reusable) {
ret_value = ion_map_fmem_buffer(buffer, cp_heap->base,
cp_heap->reserved_vrange, buffer->flags);
+ } else if (cp_heap->cma) {
+ int npages = PAGE_ALIGN(buffer->size) / PAGE_SIZE;
+ struct page **pages = vmalloc(
+ sizeof(struct page *) * npages);
+ int i;
+ pgprot_t pgprot;
+ if (ION_IS_CACHED(buffer->flags))
+ pgprot = PAGE_KERNEL;
+ else
+ pgprot = pgprot_writecombine(PAGE_KERNEL);
+
+ for (i = 0; i < npages; i++) {
+ pages[i] = phys_to_page(buffer->priv_phys +
+ i * PAGE_SIZE);
+ }
+ ret_value = vmap(pages, npages, VM_IOREMAP, pgprot);
+ vfree(pages);
} else {
if (ION_IS_CACHED(buffer->flags))
ret_value = ioremap_cached(buffer->priv_phys,
@@ -443,6 +619,8 @@
if (cp_heap->reusable)
unmap_kernel_range((unsigned long)buffer->vaddr, buffer->size);
+ else if (cp_heap->cma)
+ vunmap(buffer->vaddr);
else
__arm_iounmap(buffer->vaddr);
@@ -569,19 +747,18 @@
struct rb_node *n;
seq_printf(s, "\nMemory Map\n");
- seq_printf(s, "%16.s %16.s %14.s %14.s %14.s\n",
- "client", "creator", "start address", "end address",
+ seq_printf(s, "%16.s %14.s %14.s %14.s\n",
+ "client", "start address", "end address",
"size (hex)");
for (n = rb_first(mem_map); n; n = rb_next(n)) {
struct mem_map_data *data =
rb_entry(n, struct mem_map_data, node);
const char *client_name = "(null)";
- const char *creator_name = "(null)";
if (last_end < data->addr) {
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n",
- "FREE", "NA", last_end, data->addr-1,
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
+ "FREE", last_end, data->addr-1,
data->addr-last_end,
data->addr-last_end);
}
@@ -589,17 +766,14 @@
if (data->client_name)
client_name = data->client_name;
- if (data->creator_name)
- creator_name = data->creator_name;
-
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n",
- client_name, creator_name, data->addr,
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n",
+ client_name, data->addr,
data->addr_end,
data->size, data->size);
last_end = data->addr_end+1;
}
if (last_end < end) {
- seq_printf(s, "%16.s %16.s %14lx %14lx %14lu (%lx)\n", "FREE", "NA",
+ seq_printf(s, "%16.s %14lx %14lx %14lu (%lx)\n", "FREE",
last_end, end-1, end-last_end, end-last_end);
}
}
@@ -646,6 +820,9 @@
unsigned long virt_addr_len = cp_heap->total_size;
struct iommu_domain *domain = msm_get_iommu_domain(domain_num);
+ /* If we are mapping into the video domain we need to map twice the
+ * size of the heap to account for prefetch issue in video core.
+ */
if (domain_num == cp_heap->iommu_2x_map_domain)
virt_addr_len <<= 1;
@@ -728,7 +905,7 @@
}
if (cp_heap->iommu_iova[domain_num]) {
-
+ /* Already mapped. */
unsigned long offset = buffer->priv_phys - cp_heap->base;
data->iova_addr = cp_heap->iommu_iova[domain_num] + offset;
return 0;
@@ -740,6 +917,10 @@
data->iova_addr =
cp_heap->iommu_iova[domain_num] + offset;
cp_heap->iommu_partition[domain_num] = partition_num;
+ /*
+ clear delayed map flag so that we don't interfere
+ with this feature (we are already delaying).
+ */
data->flags &= ~ION_IOMMU_UNMAP_DELAYED;
return 0;
} else {
@@ -805,6 +986,8 @@
domain_num = iommu_map_domain(data);
+ /* If we are mapping everything we'll wait to unmap until everything
+ is freed. */
if (cp_heap->iommu_iova[domain_num])
return;
@@ -853,14 +1036,6 @@
mutex_init(&cp_heap->lock);
- cp_heap->pool = gen_pool_create(12, -1);
- if (!cp_heap->pool)
- goto free_heap;
-
- cp_heap->base = heap_data->base;
- ret = gen_pool_add(cp_heap->pool, cp_heap->base, heap_data->size, -1);
- if (ret < 0)
- goto destroy_pool;
cp_heap->allocated_bytes = 0;
cp_heap->umap_count = 0;
@@ -868,11 +1043,13 @@
cp_heap->kmap_uncached_count = 0;
cp_heap->total_size = heap_data->size;
cp_heap->heap.ops = &cp_heap_ops;
- cp_heap->heap.type = ION_HEAP_TYPE_CP;
+ cp_heap->heap.type = (enum ion_heap_type) ION_HEAP_TYPE_CP;
cp_heap->heap_protected = HEAP_NOT_PROTECTED;
- cp_heap->secure_base = cp_heap->base;
+ cp_heap->secure_base = heap_data->base;
cp_heap->secure_size = heap_data->size;
cp_heap->has_outer_cache = heap_data->has_outer_cache;
+ cp_heap->heap_size = heap_data->size;
+
atomic_set(&cp_heap->protect_cnt, 0);
if (heap_data->extra_data) {
struct ion_cp_heap_pdata *extra_data =
@@ -887,16 +1064,37 @@
if (extra_data->setup_region)
cp_heap->bus_id = extra_data->setup_region();
if (extra_data->request_region)
- cp_heap->request_region = extra_data->request_region;
+ cp_heap->heap_request_region =
+ extra_data->request_region;
if (extra_data->release_region)
- cp_heap->release_region = extra_data->release_region;
+ cp_heap->heap_release_region =
+ extra_data->release_region;
cp_heap->iommu_map_all =
extra_data->iommu_map_all;
cp_heap->iommu_2x_map_domain =
extra_data->iommu_2x_map_domain;
+ cp_heap->cma = extra_data->is_cma;
+ cp_heap->disallow_non_secure_allocation =
+ extra_data->no_nonsecure_alloc;
}
+ if (cp_heap->cma) {
+ cp_heap->pool = NULL;
+ cp_heap->cpu_addr = 0;
+ cp_heap->heap.priv = heap_data->priv;
+ } else {
+ cp_heap->pool = gen_pool_create(12, -1);
+ if (!cp_heap->pool)
+ goto free_heap;
+
+ cp_heap->base = heap_data->base;
+ ret = gen_pool_add(cp_heap->pool, cp_heap->base,
+ heap_data->size, -1);
+ if (ret < 0)
+ goto destroy_pool;
+
+ }
return &cp_heap->heap;
destroy_pool:
@@ -927,6 +1125,7 @@
*size = cp_heap->total_size;
}
+/* SCM related code for locking down memory for content protection */
#define SCM_CP_LOCK_CMD_ID 0x1
#define SCM_CP_PROTECT 0x1
diff --git a/drivers/gpu/ion/ion_heap.c b/drivers/gpu/ion/ion_heap.c
index 6ea49db..98c1a8c 100644
--- a/drivers/gpu/ion/ion_heap.c
+++ b/drivers/gpu/ion/ion_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -18,12 +18,13 @@
#include <linux/err.h>
#include <linux/ion.h>
#include "ion_priv.h"
+#include <linux/msm_ion.h>
struct ion_heap *ion_heap_create(struct ion_platform_heap *heap_data)
{
struct ion_heap *heap = NULL;
- switch (heap_data->type) {
+ switch ((int) heap_data->type) {
case ION_HEAP_TYPE_SYSTEM_CONTIG:
heap = ion_system_contig_heap_create(heap_data);
break;
@@ -39,6 +40,11 @@
case ION_HEAP_TYPE_CP:
heap = ion_cp_heap_create(heap_data);
break;
+#ifdef CONFIG_CMA
+ case ION_HEAP_TYPE_DMA:
+ heap = ion_cma_heap_create(heap_data);
+ break;
+#endif
default:
pr_err("%s: Invalid heap type %d\n", __func__,
heap_data->type);
@@ -54,6 +60,7 @@
heap->name = heap_data->name;
heap->id = heap_data->id;
+ heap->priv = heap_data->priv;
return heap;
}
@@ -62,7 +69,7 @@
if (!heap)
return;
- switch (heap->type) {
+ switch ((int) heap->type) {
case ION_HEAP_TYPE_SYSTEM_CONTIG:
ion_system_contig_heap_destroy(heap);
break;
@@ -78,6 +85,11 @@
case ION_HEAP_TYPE_CP:
ion_cp_heap_destroy(heap);
break;
+#ifdef CONFIG_CMA
+ case ION_HEAP_TYPE_DMA:
+ ion_cma_heap_destroy(heap);
+ break;
+#endif
default:
pr_err("%s: Invalid heap type %d\n", __func__,
heap->type);
diff --git a/drivers/gpu/ion/ion_iommu_heap.c b/drivers/gpu/ion/ion_iommu_heap.c
index 5f6780b..3a32390 100644
--- a/drivers/gpu/ion/ion_iommu_heap.c
+++ b/drivers/gpu/ion/ion_iommu_heap.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -12,13 +12,15 @@
*/
#include <linux/err.h>
#include <linux/io.h>
-#include <linux/ion.h>
+#include <linux/msm_ion.h>
#include <linux/mm.h>
+#include <linux/highmem.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <linux/vmalloc.h>
#include <linux/iommu.h>
#include <linux/pfn.h>
+#include <linux/dma-mapping.h>
#include "ion_priv.h"
#include <asm/mach/map.h>
@@ -31,40 +33,107 @@
unsigned int has_outer_cache;
};
+/*
+ * We will attempt to allocate high-order pages and store those in an
+ * sg_list. However, some APIs expect an array of struct page * where
+ * each page is of size PAGE_SIZE. We use this extra structure to
+ * carry around an array of such pages (derived from the high-order
+ * pages with nth_page).
+ */
struct ion_iommu_priv_data {
struct page **pages;
int nrpages;
unsigned long size;
};
+#define MAX_VMAP_RETRIES 10
+
atomic_t v = ATOMIC_INIT(0);
+static const unsigned int orders[] = {8, 4, 0};
+static const int num_orders = ARRAY_SIZE(orders);
+
+struct page_info {
+ struct page *page;
+ unsigned int order;
+ struct list_head list;
+};
+
+static unsigned int order_to_size(int order)
+{
+ return PAGE_SIZE << order;
+}
+
+static struct page_info *alloc_largest_available(unsigned long size,
+ unsigned int max_order)
+{
+ struct page *page;
+ struct page_info *info;
+ int i;
+
+ for (i = 0; i < num_orders; i++) {
+ if (size < order_to_size(orders[i]))
+ continue;
+ if (max_order < orders[i])
+ continue;
+
+ page = alloc_pages(GFP_KERNEL | __GFP_HIGHMEM | __GFP_COMP,
+ orders[i]);
+ if (!page)
+ continue;
+
+ info = kmalloc(sizeof(struct page_info), GFP_KERNEL);
+ info->page = page;
+ info->order = orders[i];
+ return info;
+ }
+ return NULL;
+}
+
static int ion_iommu_heap_allocate(struct ion_heap *heap,
struct ion_buffer *buffer,
unsigned long size, unsigned long align,
unsigned long flags)
{
- int ret = 0, i;
+ int ret, i;
+ struct list_head pages_list;
+ struct page_info *info, *tmp_info;
struct ion_iommu_priv_data *data = NULL;
- pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
- void *ptr = NULL;
if (msm_use_iommu()) {
struct scatterlist *sg;
struct sg_table *table;
- unsigned int i;
+ int j;
+ void *ptr = NULL;
+ unsigned int npages_to_vmap, total_pages, num_large_pages = 0;
+ long size_remaining = PAGE_ALIGN(size);
+ unsigned int max_order = orders[0];
data = kmalloc(sizeof(*data), GFP_KERNEL);
if (!data)
return -ENOMEM;
+ INIT_LIST_HEAD(&pages_list);
+ while (size_remaining > 0) {
+ info = alloc_largest_available(size_remaining,
+ max_order);
+ if (!info) {
+ ret = -ENOMEM;
+ goto err_free_data;
+ }
+ list_add_tail(&info->list, &pages_list);
+ size_remaining -= order_to_size(info->order);
+ max_order = info->order;
+ num_large_pages++;
+ }
+
data->size = PFN_ALIGN(size);
data->nrpages = data->size >> PAGE_SHIFT;
data->pages = kzalloc(sizeof(struct page *)*data->nrpages,
GFP_KERNEL);
if (!data->pages) {
ret = -ENOMEM;
- goto err1;
+ goto err_free_data;
}
table = buffer->sg_table =
@@ -74,30 +143,65 @@
ret = -ENOMEM;
goto err1;
}
- ret = sg_alloc_table(table, data->nrpages, GFP_KERNEL);
+ ret = sg_alloc_table(table, num_large_pages, GFP_KERNEL);
if (ret)
goto err2;
- for_each_sg(table->sgl, sg, table->nents, i) {
- data->pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (!data->pages[i])
- goto err3;
-
- sg_set_page(sg, data->pages[i], PAGE_SIZE, 0);
+ i = 0;
+ sg = table->sgl;
+ list_for_each_entry_safe(info, tmp_info, &pages_list, list) {
+ struct page *page = info->page;
+ sg_set_page(sg, page, order_to_size(info->order), 0);
+ sg_dma_address(sg) = sg_phys(sg);
+ sg = sg_next(sg);
+ for (j = 0; j < (1 << info->order); ++j)
+ data->pages[i++] = nth_page(page, j);
+ list_del(&info->list);
+ kfree(info);
}
- ptr = vmap(data->pages, data->nrpages, VM_IOREMAP, page_prot);
- if (ptr != NULL) {
- memset(ptr, 0, data->size);
- dmac_flush_range(ptr, ptr + data->size);
+ /*
+ * As an optimization, we omit __GFP_ZERO from
+ * alloc_page above and manually zero out all of the
+ * pages in one fell swoop here. To safeguard against
+ * insufficient vmalloc space, we only vmap
+ * `npages_to_vmap' at a time, starting with a
+ * conservative estimate of 1/8 of the total number of
+ * vmalloc pages available. Note that the `pages'
+ * array is composed of all 4K pages, irrespective of
+ * the size of the pages on the sg list.
+ */
+ npages_to_vmap = ((VMALLOC_END - VMALLOC_START)/8)
+ >> PAGE_SHIFT;
+ total_pages = data->nrpages;
+ for (i = 0; i < total_pages; i += npages_to_vmap) {
+ npages_to_vmap = min(npages_to_vmap, total_pages - i);
+ for (j = 0; j < MAX_VMAP_RETRIES && npages_to_vmap;
+ ++j) {
+ ptr = vmap(&data->pages[i], npages_to_vmap,
+ VM_IOREMAP, pgprot_kernel);
+ if (ptr)
+ break;
+ else
+ npages_to_vmap >>= 1;
+ }
+ if (!ptr) {
+ pr_err("Couldn't vmap the pages for zeroing\n");
+ ret = -ENOMEM;
+ goto err3;
+ }
+ memset(ptr, 0, npages_to_vmap * PAGE_SIZE);
vunmap(ptr);
- } else
- pr_err("%s: vmap() failed\n", __func__);
+ }
+
+ if (!ION_IS_CACHED(flags))
+ dma_sync_sg_for_device(NULL, table->sgl, table->nents,
+ DMA_BIDIRECTIONAL);
buffer->priv_virt = data;
-
+
atomic_add(data->size, &v);
-
+
return 0;
} else {
@@ -110,31 +214,40 @@
err2:
kfree(buffer->sg_table);
buffer->sg_table = 0;
-
- for (i = 0; i < data->nrpages; i++) {
- if (data->pages[i])
- __free_page(data->pages[i]);
- }
- kfree(data->pages);
err1:
+ kfree(data->pages);
+err_free_data:
kfree(data);
+
+ list_for_each_entry_safe(info, tmp_info, &pages_list, list) {
+ if (info->page)
+ __free_pages(info->page, info->order);
+ list_del(&info->list);
+ kfree(info);
+ }
return ret;
}
static void ion_iommu_heap_free(struct ion_buffer *buffer)
{
- struct ion_iommu_priv_data *data = buffer->priv_virt;
int i;
+ struct scatterlist *sg;
+ struct sg_table *table = buffer->sg_table;
+ struct ion_iommu_priv_data *data = buffer->priv_virt;
+ if (!table)
+ return;
if (!data)
return;
- for (i = 0; i < data->nrpages; i++)
- __free_page(data->pages[i]);
+ for_each_sg(table->sgl, sg, table->nents, i)
+ __free_pages(sg_page(sg), get_order(sg_dma_len(sg)));
-
+ sg_free_table(table);
+ kfree(table);
+ table = 0;
+
atomic_sub(data->size, &v);
-
kfree(data->pages);
kfree(data);
@@ -146,38 +259,6 @@
return ret;
}
-static int ion_iommu_print_debug(struct ion_heap *heap, struct seq_file *s,
- const struct rb_root *mem_map)
-{
- seq_printf(s, "Total bytes currently allocated: %d (%x)\n",
- atomic_read(&v), atomic_read(&v));
-
- if (mem_map) {
- struct rb_node *n;
-
- seq_printf(s, "\nBuffer Info\n");
- seq_printf(s, "%16.s %16.s %14.s\n",
- "client", "creator", "size (hex)");
-
- for (n = rb_first(mem_map); n; n = rb_next(n)) {
- struct mem_map_data *data =
- rb_entry(n, struct mem_map_data, node);
- const char *client_name = "(null)";
- const char *creator_name = "(null)";
-
- if (data->client_name)
- client_name = data->client_name;
-
- if (data->creator_name)
- creator_name = data->creator_name;
-
- seq_printf(s, "%16.s %16.s %14lu (%lx)\n",
- client_name, creator_name, data->size, data->size);
- }
- }
- return 0;
-}
-
void *ion_iommu_heap_map_kernel(struct ion_heap *heap,
struct ion_buffer *buffer)
{
@@ -208,21 +289,34 @@
int ion_iommu_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
struct vm_area_struct *vma)
{
- struct ion_iommu_priv_data *data = buffer->priv_virt;
+ struct sg_table *table = buffer->sg_table;
+ unsigned long addr = vma->vm_start;
+ unsigned long offset = vma->vm_pgoff * PAGE_SIZE;
+ struct scatterlist *sg;
int i;
- unsigned long curr_addr;
- if (!data)
- return -EINVAL;
if (!ION_IS_CACHED(buffer->flags))
vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
- curr_addr = vma->vm_start;
- for (i = 0; i < data->nrpages && curr_addr < vma->vm_end; i++) {
- if (vm_insert_page(vma, curr_addr, data->pages[i])) {
- return -EINVAL;
+ for_each_sg(table->sgl, sg, table->nents, i) {
+ struct page *page = sg_page(sg);
+ unsigned long remainder = vma->vm_end - addr;
+ unsigned long len = sg_dma_len(sg);
+
+ if (offset >= sg_dma_len(sg)) {
+ offset -= sg_dma_len(sg);
+ continue;
+ } else if (offset) {
+ page += offset / PAGE_SIZE;
+ len = sg_dma_len(sg) - offset;
+ offset = 0;
}
- curr_addr += PAGE_SIZE;
+ len = min(len, remainder);
+ remap_pfn_range(vma, addr, page_to_pfn(page), len,
+ vma->vm_page_prot);
+ addr += len;
+ if (addr >= vma->vm_end)
+ return 0;
}
return 0;
}
@@ -363,10 +457,6 @@
static void ion_iommu_heap_unmap_dma(struct ion_heap *heap,
struct ion_buffer *buffer)
{
- if (buffer->sg_table)
- sg_free_table(buffer->sg_table);
- kfree(buffer->sg_table);
- buffer->sg_table = 0;
}
static struct ion_heap_ops iommu_heap_ops = {
@@ -380,7 +470,6 @@
.cache_op = ion_iommu_cache_ops,
.map_dma = ion_iommu_heap_map_dma,
.unmap_dma = ion_iommu_heap_unmap_dma,
- .print_debug = ion_iommu_print_debug,
};
struct ion_heap *ion_iommu_heap_create(struct ion_platform_heap *heap_data)
diff --git a/drivers/gpu/ion/ion_priv.h b/drivers/gpu/ion/ion_priv.h
index 31a0d81..d494f7a 100644
--- a/drivers/gpu/ion/ion_priv.h
+++ b/drivers/gpu/ion/ion_priv.h
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_priv.h
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -32,6 +32,23 @@
DI_MAX,
};
+/**
+ * struct ion_iommu_map - represents a mapping of an ion buffer to an iommu
+ * @iova_addr - iommu virtual address
+ * @node - rb node to exist in the buffer's tree of iommu mappings
+ * @domain_info - contains the partition number and domain number
+ * domain_info[1] = domain number
+ * domain_info[0] = partition number
+ * @ref - for reference counting this mapping
+ * @mapped_size - size of the iova space mapped
+ * (may not be the same as the buffer size)
+ * @flags - iommu domain/partition specific flags.
+ *
+ * Represents a mapping of one ion buffer to a particular iommu domain
+ * and address range. There may exist other mappings of this buffer in
+ * different domains or address ranges. All mappings will have the same
+ * cacheability and security.
+ */
struct ion_iommu_map {
unsigned long iova_addr;
struct rb_node node;
@@ -47,12 +64,29 @@
struct ion_buffer *ion_handle_buffer(struct ion_handle *handle);
+/**
+ * struct ion_buffer - metadata for a particular buffer
+ * @ref: refernce count
+ * @node: node in the ion_device buffers tree
+ * @dev: back pointer to the ion_device
+ * @heap: back pointer to the heap the buffer came from
+ * @flags: buffer specific flags
+ * @size: size of the buffer
+ * @priv_virt: private data to the buffer representable as
+ * a void *
+ * @priv_phys: private data to the buffer representable as
+ * an ion_phys_addr_t (and someday a phys_addr_t)
+ * @lock: protects the buffers cnt fields
+ * @kmap_cnt: number of times the buffer is mapped to the kernel
+ * @vaddr: the kenrel mapping if kmap_cnt is not zero
+ * @dmap_cnt: number of times the buffer is mapped for dma
+ * @sg_table: the sg table for the buffer if dmap_cnt is not zero
+*/
struct ion_buffer {
struct kref ref;
struct rb_node node;
struct ion_device *dev;
struct ion_heap *heap;
- struct ion_client *creator;
unsigned long flags;
size_t size;
union {
@@ -70,6 +104,19 @@
int marked;
};
+/**
+ * struct ion_heap_ops - ops to operate on a given heap
+ * @allocate: allocate memory
+ * @free: free memory
+ * @phys get physical address of a buffer (only define on
+ * physically contiguous heaps)
+ * @map_dma map the memory for dma to a scatterlist
+ * @unmap_dma unmap the memory for dma
+ * @map_kernel map memory to the kernel
+ * @unmap_kernel unmap memory to the kernel
+ * @map_user map memory to userspace
+ * @unmap_user unmap memory to userspace
+ */
struct ion_heap_ops {
int (*allocate) (struct ion_heap *heap,
struct ion_buffer *buffer, unsigned long len,
@@ -102,6 +149,23 @@
int (*unsecure_heap)(struct ion_heap *heap, int version, void *data);
};
+/**
+ * struct ion_heap - represents a heap in the system
+ * @node: rb node to put the heap on the device's tree of heaps
+ * @dev: back pointer to the ion_device
+ * @type: type of heap
+ * @ops: ops struct as above
+ * @id: id of heap, also indicates priority of this heap when
+ * allocating. These are specified by platform data and
+ * MUST be unique
+ * @name: used for debugging
+ * @priv: private heap data
+ *
+ * Represents a pool of memory from which buffers can be made. In some
+ * systems the only heap is regular system memory allocated via vmalloc.
+ * On others, some blocks might require large physically contiguous buffers
+ * that are allocated from a specially reserved heap.
+ */
struct ion_heap {
struct rb_node node;
struct ion_device *dev;
@@ -109,29 +173,58 @@
struct ion_heap_ops *ops;
int id;
const char *name;
+ void *priv;
};
+/**
+ * struct mem_map_data - represents information about the memory map for a heap
+ * @node: rb node used to store in the tree of mem_map_data
+ * @addr: start address of memory region.
+ * @addr: end address of memory region.
+ * @size: size of memory region
+ * @client_name: name of the client who owns this buffer.
+ *
+ */
struct mem_map_data {
struct rb_node node;
unsigned long addr;
unsigned long addr_end;
unsigned long size;
const char *client_name;
- const char *creator_name;
};
#define iommu_map_domain(__m) ((__m)->domain_info[1])
#define iommu_map_partition(__m) ((__m)->domain_info[0])
+/**
+ * ion_device_create - allocates and returns an ion device
+ * @custom_ioctl: arch specific ioctl function if applicable
+ *
+ * returns a valid device or -PTR_ERR
+ */
struct ion_device *ion_device_create(long (*custom_ioctl)
(struct ion_client *client,
unsigned int cmd,
unsigned long arg));
+/**
+ * ion_device_destroy - free and device and it's resource
+ * @dev: the device
+ */
void ion_device_destroy(struct ion_device *dev);
+/**
+ * ion_device_add_heap - adds a heap to the ion device
+ * @dev: the device
+ * @heap: the heap to add
+ */
void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap);
+/**
+ * functions for creating and destroying the built in ion heaps.
+ * architectures can add their own custom architecture specific
+ * heaps as appropriate.
+ */
struct ion_heap *ion_heap_create(struct ion_platform_heap *);
void ion_heap_destroy(struct ion_heap *);
@@ -154,21 +247,63 @@
struct ion_heap *ion_reusable_heap_create(struct ion_platform_heap *);
void ion_reusable_heap_destroy(struct ion_heap *);
+/**
+ * kernel api to allocate/free from carveout -- used when carveout is
+ * used to back an architecture specific custom heap
+ */
ion_phys_addr_t ion_carveout_allocate(struct ion_heap *heap, unsigned long size,
unsigned long align);
void ion_carveout_free(struct ion_heap *heap, ion_phys_addr_t addr,
unsigned long size);
+#ifdef CONFIG_CMA
+struct ion_heap *ion_cma_heap_create(struct ion_platform_heap *);
+void ion_cma_heap_destroy(struct ion_heap *);
+#endif
struct ion_heap *msm_get_contiguous_heap(void);
+/**
+ * The carveout/cp heap returns physical addresses, since 0 may be a valid
+ * physical address, this is used to indicate allocation failed
+ */
#define ION_CARVEOUT_ALLOCATE_FAIL -1
#define ION_CP_ALLOCATE_FAIL -1
+/**
+ * The reserved heap returns physical addresses, since 0 may be a valid
+ * physical address, this is used to indicate allocation failed
+ */
#define ION_RESERVED_ALLOCATE_FAIL -1
+/**
+ * ion_map_fmem_buffer - map fmem allocated memory into the kernel
+ * @buffer - buffer to map
+ * @phys_base - physical base of the heap
+ * @virt_base - virtual base of the heap
+ * @flags - flags for the heap
+ *
+ * Map fmem allocated memory into the kernel address space. This
+ * is designed to be used by other heaps that need fmem behavior.
+ * The virtual range must be pre-allocated.
+ */
void *ion_map_fmem_buffer(struct ion_buffer *buffer, unsigned long phys_base,
void *virt_base, unsigned long flags);
+/**
+ * ion_do_cache_op - do cache operations.
+ *
+ * @client - pointer to ION client.
+ * @handle - pointer to buffer handle.
+ * @uaddr - virtual address to operate on.
+ * @offset - offset from physical address.
+ * @len - Length of data to do cache operation on.
+ * @cmd - Cache operation to perform:
+ * ION_IOC_CLEAN_CACHES
+ * ION_IOC_INV_CACHES
+ * ION_IOC_CLEAN_INV_CACHES
+ *
+ * Returns 0 on success
+ */
int ion_do_cache_op(struct ion_client *client, struct ion_handle *handle,
void *uaddr, unsigned long offset, unsigned long len,
unsigned int cmd);
@@ -178,4 +313,4 @@
void ion_mem_map_show(struct ion_heap *heap);
-#endif
+#endif /* _ION_PRIV_H */
diff --git a/drivers/gpu/ion/ion_system_heap.c b/drivers/gpu/ion/ion_system_heap.c
index 299c24c..980174e 100644
--- a/drivers/gpu/ion/ion_system_heap.c
+++ b/drivers/gpu/ion/ion_system_heap.c
@@ -2,7 +2,7 @@
* drivers/gpu/ion/ion_system_heap.c
*
* Copyright (C) 2011 Google, Inc.
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This software is licensed under the terms of the GNU General Public
* License version 2, as published by the Free Software Foundation, and
@@ -27,6 +27,7 @@
#include "ion_priv.h"
#include <mach/memory.h>
#include <asm/cacheflush.h>
+#include <linux/msm_ion.h>
static atomic_t system_heap_allocated;
static atomic_t system_contig_heap_allocated;
@@ -206,6 +207,10 @@
for_each_sg(table->sgl, sg, table->nents, i) {
struct page *page = sg_page(sg);
pstart = page_to_phys(page);
+ /*
+ * If page -> phys is returning NULL, something
+ * has really gone wrong...
+ */
if (!pstart) {
WARN(1, "Could not translate virtual address to physical address\n");
return -EINVAL;
@@ -478,7 +483,7 @@
}
page = virt_to_page(buffer->vaddr);
- sglist = vmalloc(sizeof(*sglist));
+ sglist = kmalloc(sizeof(*sglist), GFP_KERNEL);
if (!sglist)
goto out1;
@@ -500,13 +505,13 @@
if (ret)
goto out2;
}
- vfree(sglist);
+ kfree(sglist);
return ret;
out2:
iommu_unmap_range(domain, data->iova_addr, buffer->size);
out1:
- vfree(sglist);
+ kfree(sglist);
msm_free_iova_address(data->iova_addr, domain_num, partition_num,
data->mapped_size);
out:
diff --git a/drivers/gpu/ion/ion_system_mapper.c b/drivers/gpu/ion/ion_system_mapper.c
new file mode 100644
index 0000000..692458e
--- /dev/null
+++ b/drivers/gpu/ion/ion_system_mapper.c
@@ -0,0 +1,114 @@
+/*
+ * drivers/gpu/ion/ion_system_mapper.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/ion.h>
+#include <linux/memory.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include "ion_priv.h"
+/*
+ * This mapper is valid for any heap that allocates memory that already has
+ * a kernel mapping, this includes vmalloc'd memory, kmalloc'd memory,
+ * pages obtained via io_remap, etc.
+ */
+static void *ion_kernel_mapper_map(struct ion_mapper *mapper,
+ struct ion_buffer *buffer,
+ struct ion_mapping **mapping)
+{
+ if (!((1 << buffer->heap->type) & mapper->heap_mask)) {
+ pr_err("%s: attempting to map an unsupported heap\n", __func__);
+ return ERR_PTR(-EINVAL);
+ }
+ /* XXX REVISIT ME!!! */
+ *((unsigned long *)mapping) = (unsigned long)buffer->priv;
+ return buffer->priv;
+}
+
+static void ion_kernel_mapper_unmap(struct ion_mapper *mapper,
+ struct ion_buffer *buffer,
+ struct ion_mapping *mapping)
+{
+ if (!((1 << buffer->heap->type) & mapper->heap_mask))
+ pr_err("%s: attempting to unmap an unsupported heap\n",
+ __func__);
+}
+
+static void *ion_kernel_mapper_map_kernel(struct ion_mapper *mapper,
+ struct ion_buffer *buffer,
+ struct ion_mapping *mapping)
+{
+ if (!((1 << buffer->heap->type) & mapper->heap_mask)) {
+ pr_err("%s: attempting to unmap an unsupported heap\n",
+ __func__);
+ return ERR_PTR(-EINVAL);
+ }
+ return buffer->priv;
+}
+
+static int ion_kernel_mapper_map_user(struct ion_mapper *mapper,
+ struct ion_buffer *buffer,
+ struct vm_area_struct *vma,
+ struct ion_mapping *mapping)
+{
+ int ret;
+
+ switch (buffer->heap->type) {
+ case ION_HEAP_KMALLOC:
+ {
+ unsigned long pfn = __phys_to_pfn(virt_to_phys(buffer->priv));
+ ret = remap_pfn_range(vma, vma->vm_start, pfn + vma->vm_pgoff,
+ vma->vm_end - vma->vm_start,
+ vma->vm_page_prot);
+ break;
+ }
+ case ION_HEAP_VMALLOC:
+ ret = remap_vmalloc_range(vma, buffer->priv, vma->vm_pgoff);
+ break;
+ default:
+ pr_err("%s: attempting to map unsupported heap to userspace\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return ret;
+}
+
+static struct ion_mapper_ops ops = {
+ .map = ion_kernel_mapper_map,
+ .map_kernel = ion_kernel_mapper_map_kernel,
+ .map_user = ion_kernel_mapper_map_user,
+ .unmap = ion_kernel_mapper_unmap,
+};
+
+struct ion_mapper *ion_system_mapper_create(void)
+{
+ struct ion_mapper *mapper;
+ mapper = kzalloc(sizeof(struct ion_mapper), GFP_KERNEL);
+ if (!mapper)
+ return ERR_PTR(-ENOMEM);
+ mapper->type = ION_SYSTEM_MAPPER;
+ mapper->ops = &ops;
+ mapper->heap_mask = (1 << ION_HEAP_VMALLOC) | (1 << ION_HEAP_KMALLOC);
+ return mapper;
+}
+
+void ion_system_mapper_destroy(struct ion_mapper *mapper)
+{
+ kfree(mapper);
+}
+
diff --git a/drivers/gpu/ion/msm/ion_cp_common.c b/drivers/gpu/ion/msm/ion_cp_common.c
index b274ba2..41e0a04 100644
--- a/drivers/gpu/ion/msm/ion_cp_common.c
+++ b/drivers/gpu/ion/msm/ion_cp_common.c
@@ -1,6 +1,6 @@
/*
* Copyright (C) 2011 Google, Inc
- * Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
diff --git a/drivers/gpu/ion/msm/ion_cp_common.h b/drivers/gpu/ion/msm/ion_cp_common.h
index 950966d..eec66e6 100644
--- a/drivers/gpu/ion/msm/ion_cp_common.h
+++ b/drivers/gpu/ion/msm/ion_cp_common.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+ * Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -15,12 +15,24 @@
#define ION_CP_COMMON_H
#include <asm-generic/errno-base.h>
-#include <linux/ion.h>
+#include <linux/msm_ion.h>
#define ION_CP_V1 1
#define ION_CP_V2 2
#if defined(CONFIG_ION_MSM)
+/*
+ * ion_cp2_protect_mem - secures memory via trustzone
+ *
+ * @chunks - physical address of the array containing the chunks to
+ * be locked down
+ * @nchunks - number of entries in the array
+ * @chunk_size - size of each memory chunk
+ * @usage - usage hint
+ * @lock - 1 for lock, 0 for unlock
+ *
+ * return value is the result of the scm call
+ */
int ion_cp_change_chunks_state(unsigned long chunks, unsigned int nchunks,
unsigned int chunk_size, enum cp_mem_usage usage,
int lock);
diff --git a/drivers/gpu/ion/msm/msm_ion.c b/drivers/gpu/ion/msm/msm_ion.c
index 03a33f3..ab5d09b 100644
--- a/drivers/gpu/ion/msm/msm_ion.c
+++ b/drivers/gpu/ion/msm/msm_ion.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,20 +13,92 @@
#include <linux/export.h>
#include <linux/err.h>
-#include <linux/ion.h>
+#include <linux/msm_ion.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/memory_alloc.h>
#include <linux/fmem.h>
+#include <linux/of.h>
+#include <linux/mm.h>
+#include <linux/mm_types.h>
+#include <linux/sched.h>
+#include <linux/rwsem.h>
+#include <linux/uaccess.h>
#include <mach/ion.h>
#include <mach/msm_memtypes.h>
#include "../ion_priv.h"
#include "ion_cp_common.h"
+#define ION_COMPAT_STR "qcom,msm-ion"
+#define ION_COMPAT_MEM_RESERVE_STR "qcom,msm-ion-reserve"
+
static struct ion_device *idev;
static int num_heaps;
static struct ion_heap **heaps;
+struct ion_heap_desc {
+ unsigned int id;
+ enum ion_heap_type type;
+ const char *name;
+ unsigned int permission_type;
+};
+
+
+static struct ion_heap_desc ion_heap_meta[] = {
+ {
+ .id = ION_SYSTEM_HEAP_ID,
+ .type = ION_HEAP_TYPE_SYSTEM,
+ .name = ION_VMALLOC_HEAP_NAME,
+ },
+ {
+ .id = ION_CP_MM_HEAP_ID,
+ .type = ION_HEAP_TYPE_CP,
+ .name = ION_MM_HEAP_NAME,
+ .permission_type = IPT_TYPE_MM_CARVEOUT,
+ },
+ {
+ .id = ION_MM_FIRMWARE_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_MM_FIRMWARE_HEAP_NAME,
+ },
+ {
+ .id = ION_CP_MFC_HEAP_ID,
+ .type = ION_HEAP_TYPE_CP,
+ .name = ION_MFC_HEAP_NAME,
+ .permission_type = IPT_TYPE_MFC_SHAREDMEM,
+ },
+ {
+ .id = ION_SF_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_SF_HEAP_NAME,
+ },
+ {
+ .id = ION_IOMMU_HEAP_ID,
+ .type = ION_HEAP_TYPE_IOMMU,
+ .name = ION_IOMMU_HEAP_NAME,
+ },
+ {
+ .id = ION_QSECOM_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_QSECOM_HEAP_NAME,
+ },
+ {
+ .id = ION_AUDIO_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_AUDIO_HEAP_NAME,
+ },
+ {
+ .id = ION_CP_WB_HEAP_ID,
+ .type = ION_HEAP_TYPE_CP,
+ .name = ION_WB_HEAP_NAME,
+ },
+ {
+ .id = ION_CAMERA_HEAP_ID,
+ .type = ION_HEAP_TYPE_CARVEOUT,
+ .name = ION_CAMERA_HEAP_NAME,
+ },
+};
+
struct ion_client *msm_ion_client_create(unsigned int heap_mask,
const char *name)
{
@@ -164,6 +236,15 @@
}
}
+/* Fixup heaps in board file to support two heaps being adjacent to each other.
+ * A flag (adjacent_mem_id) in the platform data tells us that the heap phy
+ * memory location must be adjacent to the specified heap. We do this by
+ * carving out memory for both heaps and then splitting up the memory to the
+ * two heaps. The heap specifying the "adjacent_mem_id" get the base of the
+ * memory while heap specified in "adjacent_mem_id" get base+size as its
+ * base address.
+ * Note: Modifies platform data and allocates memory.
+ */
static void msm_ion_heap_fixup(struct ion_platform_heap heap_data[],
unsigned int nr_heaps)
{
@@ -183,7 +264,7 @@
if (!heap->base && heap->extra_data) {
unsigned int align = 0;
- switch (heap->type) {
+ switch ((int) heap->type) {
case ION_HEAP_TYPE_CARVEOUT:
align =
((struct ion_co_heap_pdata *) heap->extra_data)->align;
@@ -260,11 +341,339 @@
}
}
+static int msm_init_extra_data(struct ion_platform_heap *heap,
+ const struct ion_heap_desc *heap_desc)
+{
+ int ret = 0;
+
+ switch ((int) heap->type) {
+ case ION_HEAP_TYPE_CP:
+ {
+ heap->extra_data = kzalloc(sizeof(struct ion_cp_heap_pdata),
+ GFP_KERNEL);
+ if (!heap->extra_data) {
+ ret = -ENOMEM;
+ } else {
+ struct ion_cp_heap_pdata *extra = heap->extra_data;
+ extra->permission_type = heap_desc->permission_type;
+ }
+ break;
+ }
+ case ION_HEAP_TYPE_CARVEOUT:
+ {
+ heap->extra_data = kzalloc(sizeof(struct ion_co_heap_pdata),
+ GFP_KERNEL);
+ if (!heap->extra_data)
+ ret = -ENOMEM;
+ break;
+ }
+ default:
+ heap->extra_data = 0;
+ break;
+ }
+ return ret;
+}
+
+static int msm_ion_populate_heap(struct ion_platform_heap *heap)
+{
+ unsigned int i;
+ int ret = -EINVAL;
+ unsigned int len = ARRAY_SIZE(ion_heap_meta);
+ for (i = 0; i < len; ++i) {
+ if (ion_heap_meta[i].id == heap->id) {
+ heap->name = ion_heap_meta[i].name;
+ heap->type = ion_heap_meta[i].type;
+ ret = msm_init_extra_data(heap, &ion_heap_meta[i]);
+ break;
+ }
+ }
+ if (ret)
+ pr_err("%s: Unable to populate heap, error: %d", __func__, ret);
+ return ret;
+}
+
+static void free_pdata(const struct ion_platform_data *pdata)
+{
+ unsigned int i;
+ for (i = 0; i < pdata->nr; ++i)
+ kfree(pdata->heaps[i].extra_data);
+ kfree(pdata);
+}
+
+static int memtype_to_ion_memtype[] = {
+ [MEMTYPE_SMI_KERNEL] = ION_SMI_TYPE,
+ [MEMTYPE_SMI] = ION_SMI_TYPE,
+ [MEMTYPE_EBI0] = ION_EBI_TYPE,
+ [MEMTYPE_EBI1] = ION_EBI_TYPE,
+};
+
+static void msm_ion_get_heap_align(struct device_node *node,
+ struct ion_platform_heap *heap)
+{
+ unsigned int val;
+
+ int ret = of_property_read_u32(node, "qcom,heap-align", &val);
+ if (!ret) {
+ switch ((int) heap->type) {
+ case ION_HEAP_TYPE_CP:
+ {
+ struct ion_cp_heap_pdata *extra =
+ heap->extra_data;
+ extra->align = val;
+ break;
+ }
+ case ION_HEAP_TYPE_CARVEOUT:
+ {
+ struct ion_co_heap_pdata *extra =
+ heap->extra_data;
+ extra->align = val;
+ break;
+ }
+ default:
+ pr_err("ION-heap %s: Cannot specify alignment for this type of heap\n",
+ heap->name);
+ break;
+ }
+ }
+}
+
+static int msm_ion_get_heap_size(struct device_node *node,
+ struct ion_platform_heap *heap)
+{
+ unsigned int val;
+ int ret = 0;
+ const char *memory_name_prop;
+
+ ret = of_property_read_u32(node, "qcom,memory-reservation-size", &val);
+ if (!ret) {
+ heap->size = val;
+ ret = of_property_read_string(node,
+ "qcom,memory-reservation-type",
+ &memory_name_prop);
+
+ if (!ret && memory_name_prop) {
+ val = msm_get_memory_type_from_name(memory_name_prop);
+ if (val < 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+ heap->memory_type = memtype_to_ion_memtype[val];
+ }
+ if (heap->size && (ret || !memory_name_prop)) {
+ pr_err("%s: Need to specify reservation type\n",
+ __func__);
+ ret = -EINVAL;
+ }
+ } else {
+ ret = 0;
+ }
+out:
+ return ret;
+}
+
+
+static void msm_ion_get_heap_adjacent(struct device_node *node,
+ struct ion_platform_heap *heap)
+{
+ unsigned int val;
+ int ret = of_property_read_u32(node, "qcom,heap-adjacent", &val);
+ if (!ret) {
+ switch (heap->type) {
+ case ION_HEAP_TYPE_CARVEOUT:
+ {
+ struct ion_co_heap_pdata *extra = heap->extra_data;
+ extra->adjacent_mem_id = val;
+ break;
+ }
+ default:
+ pr_err("ION-heap %s: Cannot specify adjcent mem id for this type of heap\n",
+ heap->name);
+ break;
+ }
+ } else {
+ switch (heap->type) {
+ case ION_HEAP_TYPE_CARVEOUT:
+ {
+ struct ion_co_heap_pdata *extra = heap->extra_data;
+ extra->adjacent_mem_id = INVALID_HEAP_ID;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+}
+
+static struct ion_platform_data *msm_ion_parse_dt(
+ const struct device_node *dt_node)
+{
+ struct ion_platform_data *pdata = 0;
+ struct device_node *node;
+ uint32_t val = 0;
+ int ret = 0;
+ uint32_t num_heaps = 0;
+ int idx = 0;
+
+ for_each_child_of_node(dt_node, node)
+ num_heaps++;
+
+ if (!num_heaps)
+ return ERR_PTR(-EINVAL);
+
+ pdata = kzalloc(sizeof(struct ion_platform_data) +
+ num_heaps*sizeof(struct ion_platform_heap), GFP_KERNEL);
+ if (!pdata)
+ return ERR_PTR(-ENOMEM);
+
+ pdata->nr = num_heaps;
+
+ for_each_child_of_node(dt_node, node) {
+ /**
+ * TODO: Replace this with of_get_address() when this patch
+ * gets merged: http://
+ * permalink.gmane.org/gmane.linux.drivers.devicetree/18614
+ */
+ ret = of_property_read_u32(node, "reg", &val);
+ if (ret) {
+ pr_err("%s: Unable to find reg key", __func__);
+ goto free_heaps;
+ }
+ pdata->heaps[idx].id = val;
+
+ ret = msm_ion_populate_heap(&pdata->heaps[idx]);
+ if (ret)
+ goto free_heaps;
+
+ msm_ion_get_heap_align(node, &pdata->heaps[idx]);
+
+ ret = msm_ion_get_heap_size(node, &pdata->heaps[idx]);
+ if (ret)
+ goto free_heaps;
+
+ msm_ion_get_heap_adjacent(node, &pdata->heaps[idx]);
+
+ ++idx;
+ }
+ return pdata;
+
+free_heaps:
+ free_pdata(pdata);
+ return ERR_PTR(ret);
+}
+
+static int check_vaddr_bounds(unsigned long start, unsigned long end)
+{
+ struct mm_struct *mm = current->active_mm;
+ struct vm_area_struct *vma;
+ int ret = 1;
+
+ if (end < start)
+ goto out;
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, start);
+ if (vma && vma->vm_start < end) {
+ if (start < vma->vm_start)
+ goto out_up;
+ if (end > vma->vm_end)
+ goto out_up;
+ ret = 0;
+ }
+
+out_up:
+ up_read(&mm->mmap_sem);
+out:
+ return ret;
+}
+
+static long msm_ion_custom_ioctl(struct ion_client *client,
+ unsigned int cmd,
+ unsigned long arg)
+{
+ switch (cmd) {
+ case ION_IOC_CLEAN_CACHES:
+ case ION_IOC_INV_CACHES:
+ case ION_IOC_CLEAN_INV_CACHES:
+ {
+ struct ion_flush_data data;
+ unsigned long start, end;
+ struct ion_handle *handle = NULL;
+ int ret;
+
+ if (copy_from_user(&data, (void __user *)arg,
+ sizeof(struct ion_flush_data)))
+ return -EFAULT;
+
+ start = (unsigned long) data.vaddr;
+ end = (unsigned long) data.vaddr + data.length;
+
+ if (check_vaddr_bounds(start, end)) {
+ pr_err("%s: virtual address %p is out of bounds\n",
+ __func__, data.vaddr);
+ return -EINVAL;
+ }
+
+ if (!data.handle) {
+ handle = ion_import_dma_buf(client, data.fd);
+ if (IS_ERR(handle)) {
+ pr_info("%s: Could not import handle: %d\n",
+ __func__, (int)handle);
+ return -EINVAL;
+ }
+ }
+
+ ret = ion_do_cache_op(client,
+ data.handle ? data.handle : handle,
+ data.vaddr, data.offset, data.length,
+ cmd);
+
+ if (!data.handle)
+ ion_free(client, handle);
+
+ if (ret < 0)
+ return ret;
+ break;
+
+ }
+ case ION_IOC_GET_FLAGS:
+ {
+ struct ion_flag_data data;
+ int ret;
+ if (copy_from_user(&data, (void __user *)arg,
+ sizeof(struct ion_flag_data)))
+ return -EFAULT;
+
+ ret = ion_handle_get_flags(client, data.handle, &data.flags);
+ if (ret < 0)
+ return ret;
+ if (copy_to_user((void __user *)arg, &data,
+ sizeof(struct ion_flag_data)))
+ return -EFAULT;
+ break;
+ }
+ default:
+ return -ENOTTY;
+ }
+ return 0;
+}
+
static int msm_ion_probe(struct platform_device *pdev)
{
- struct ion_platform_data *pdata = pdev->dev.platform_data;
- int err;
+ struct ion_platform_data *pdata;
+ unsigned int pdata_needs_to_be_freed;
+ int err = -1;
int i;
+ if (pdev->dev.of_node) {
+ pdata = msm_ion_parse_dt(pdev->dev.of_node);
+ if (IS_ERR(pdata)) {
+ err = PTR_ERR(pdata);
+ goto out;
+ }
+ pdata_needs_to_be_freed = 1;
+ } else {
+ pdata = pdev->dev.platform_data;
+ pdata_needs_to_be_freed = 0;
+ }
num_heaps = pdata->nr;
@@ -275,7 +684,7 @@
goto out;
}
- idev = ion_device_create(NULL);
+ idev = ion_device_create(msm_ion_custom_ioctl);
if (IS_ERR_OR_NULL(idev)) {
err = PTR_ERR(idev);
goto freeheaps;
@@ -283,7 +692,7 @@
msm_ion_heap_fixup(pdata->heaps, num_heaps);
-
+ /* create the heaps as specified in the board file */
for (i = 0; i < num_heaps; i++) {
struct ion_platform_heap *heap_data = &pdata->heaps[i];
msm_ion_allocate(heap_data);
@@ -306,6 +715,8 @@
ion_device_add_heap(idev, heaps[i]);
}
+ if (pdata_needs_to_be_freed)
+ free_pdata(pdata);
check_for_heap_overlap(pdata->heaps, num_heaps);
platform_set_drvdata(pdev, idev);
@@ -313,6 +724,8 @@
freeheaps:
kfree(heaps);
+ if (pdata_needs_to_be_freed)
+ free_pdata(pdata);
out:
return err;
}
@@ -330,10 +743,19 @@
return 0;
}
+static struct of_device_id msm_ion_match_table[] = {
+ {.compatible = ION_COMPAT_STR},
+ {},
+};
+EXPORT_COMPAT(ION_COMPAT_MEM_RESERVE_STR);
+
static struct platform_driver msm_ion_driver = {
.probe = msm_ion_probe,
.remove = msm_ion_remove,
- .driver = { .name = "ion-msm" }
+ .driver = {
+ .name = "ion-msm",
+ .of_match_table = msm_ion_match_table,
+ },
};
static int __init msm_ion_init(void)
diff --git a/drivers/gpu/ion/tegra/tegra_ion.c b/drivers/gpu/ion/tegra/tegra_ion.c
new file mode 100644
index 0000000..7af6e16
--- /dev/null
+++ b/drivers/gpu/ion/tegra/tegra_ion.c
@@ -0,0 +1,96 @@
+/*
+ * drivers/gpu/tegra/tegra_ion.c
+ *
+ * Copyright (C) 2011 Google, Inc.
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/ion.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include "../ion_priv.h"
+
+struct ion_device *idev;
+struct ion_mapper *tegra_user_mapper;
+int num_heaps;
+struct ion_heap **heaps;
+
+int tegra_ion_probe(struct platform_device *pdev)
+{
+ struct ion_platform_data *pdata = pdev->dev.platform_data;
+ int err;
+ int i;
+
+ num_heaps = pdata->nr;
+
+ heaps = kzalloc(sizeof(struct ion_heap *) * pdata->nr, GFP_KERNEL);
+
+ idev = ion_device_create(NULL);
+ if (IS_ERR_OR_NULL(idev)) {
+ kfree(heaps);
+ return PTR_ERR(idev);
+ }
+
+ /* create the heaps as specified in the board file */
+ for (i = 0; i < num_heaps; i++) {
+ struct ion_platform_heap *heap_data = &pdata->heaps[i];
+
+ heaps[i] = ion_heap_create(heap_data);
+ if (IS_ERR_OR_NULL(heaps[i])) {
+ err = PTR_ERR(heaps[i]);
+ goto err;
+ }
+ ion_device_add_heap(idev, heaps[i]);
+ }
+ platform_set_drvdata(pdev, idev);
+ return 0;
+err:
+ for (i = 0; i < num_heaps; i++) {
+ if (heaps[i])
+ ion_heap_destroy(heaps[i]);
+ }
+ kfree(heaps);
+ return err;
+}
+
+int tegra_ion_remove(struct platform_device *pdev)
+{
+ struct ion_device *idev = platform_get_drvdata(pdev);
+ int i;
+
+ ion_device_destroy(idev);
+ for (i = 0; i < num_heaps; i++)
+ ion_heap_destroy(heaps[i]);
+ kfree(heaps);
+ return 0;
+}
+
+static struct platform_driver ion_driver = {
+ .probe = tegra_ion_probe,
+ .remove = tegra_ion_remove,
+ .driver = { .name = "ion-tegra" }
+};
+
+static int __init ion_init(void)
+{
+ return platform_driver_register(&ion_driver);
+}
+
+static void __exit ion_exit(void)
+{
+ platform_driver_unregister(&ion_driver);
+}
+
+module_init(ion_init);
+module_exit(ion_exit);
+
diff --git a/drivers/gpu/msm/Kconfig b/drivers/gpu/msm/Kconfig
index 0b293a7..ba63fbc 100644
--- a/drivers/gpu/msm/Kconfig
+++ b/drivers/gpu/msm/Kconfig
@@ -78,13 +78,6 @@
Sets the pagetable size used by the MMU. The max value
is 0xFFF0000 or (256M - 64K).
-config MSM_KGSL_PAGE_TABLE_SIZE_FOR_IOMMU
- hex "Size of pagetables for iommu"
- default 0x1FE00000
- ---help---
- Sets the pagetable size used by the IOMMU. The max value
- is 0x1FE00000 or (512M - 1536K - little interval).
-
config MSM_KGSL_PAGE_TABLE_COUNT
int "Minimum of concurrent pagetables to support"
default 8
@@ -103,21 +96,3 @@
bool "Disable register shadow writes for context switches"
default n
depends on MSM_KGSL
-
-config MSM_KGSL_GPU_USAGE
- bool "Enable sysfs node of GPU usage per process"
- default n
-
-config MSM_KGSL_DEFAULT_GPUMMU
- bool "Prefer gpummu than iommu"
- default n
-
-config MSM_KGSL_GPU_USAGE_SYSTRACE
- bool "Enable kgsl_usage node for ftrace gpu usage event"
- default y
-
-config MSM_KGSL_KILL_HANG_PROCESS
- bool "Enable killing recoverable gpu hang process routine"
- default y
- ---help---
- We only enable this config in CRC branch.
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index 6cdb5f1..fec5363 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -9,7 +9,8 @@
kgsl_mmu.o \
kgsl_gpummu.o \
kgsl_iommu.o \
- kgsl_snapshot.o
+ kgsl_snapshot.o \
+ kgsl_events.o
msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
msm_kgsl_core-$(CONFIG_MSM_KGSL_CFF_DUMP) += kgsl_cffdump.o
@@ -17,6 +18,7 @@
msm_kgsl_core-$(CONFIG_MSM_SCM) += kgsl_pwrscale_trustzone.o
msm_kgsl_core-$(CONFIG_MSM_SLEEP_STATS_DEVICE) += kgsl_pwrscale_idlestats.o
msm_kgsl_core-$(CONFIG_MSM_DCVS) += kgsl_pwrscale_msm.o
+msm_kgsl_core-$(CONFIG_SYNC) += kgsl_sync.o
msm_adreno-y += \
adreno_ringbuffer.o \
@@ -35,6 +37,7 @@
msm_z180-y += \
z180.o \
+ z180_postmortem.o \
z180_trace.o
msm_kgsl_core-objs = $(msm_kgsl_core-y)
diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h
index bde8784..c70c4eb 100644
--- a/drivers/gpu/msm/a2xx_reg.h
+++ b/drivers/gpu/msm/a2xx_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -386,6 +386,7 @@
#define REG_COHER_STATUS_PM4 0xA2B
#define REG_COHER_SIZE_PM4 0xA29
+/*registers added in adreno220*/
#define REG_A220_PC_INDX_OFFSET REG_VGT_INDX_OFFSET
#define REG_A220_PC_VERTEX_REUSE_BLOCK_CNTL REG_VGT_VERTEX_REUSE_BLOCK_CNTL
#define REG_A220_PC_MAX_VTX_INDX REG_VGT_MAX_VTX_INDX
@@ -394,12 +395,14 @@
#define REG_A220_VSC_BIN_SIZE 0x0C01
#define REG_A220_VSC_PIPE_DATA_LENGTH_7 0x0C1D
+/*registers added in adreno225*/
#define REG_A225_RB_COLOR_INFO3 0x2005
#define REG_A225_PC_MULTI_PRIM_IB_RESET_INDX 0x2103
#define REG_A225_GRAS_UCP0X 0x2340
#define REG_A225_GRAS_UCP5W 0x2357
#define REG_A225_GRAS_UCP_ENABLED 0x2360
+/* Debug registers used by snapshot */
#define REG_PA_SU_DEBUG_CNTL 0x0C80
#define REG_PA_SU_DEBUG_DATA 0x0C81
#define REG_RB_DEBUG_CNTL 0x0F26
@@ -432,4 +435,4 @@
#define REG_SQ_DEBUG_MISC_0 0x2309
#define REG_SQ_DEBUG_MISC_1 0x230A
-#endif
+#endif /* __A200_REG_H */
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index 77bd1d0..be9f3ac 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -14,6 +14,7 @@
#ifndef _A300_REG_H
#define _A300_REG_H
+/* Interrupt bit positions within RBBM_INT_0 */
#define A3XX_INT_RBBM_GPU_IDLE 0
#define A3XX_INT_RBBM_AHB_ERROR 1
@@ -40,6 +41,7 @@
#define A3XX_INT_MISC_HANG_DETECT 24
#define A3XX_INT_UCHE_OOB_ACCESS 25
+/* Register definitions */
#define A3XX_RBBM_HW_VERSION 0x000
#define A3XX_RBBM_HW_RELEASE 0x001
@@ -52,6 +54,7 @@
#define A3XX_RBBM_AHB_CMD 0x022
#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
#define A3XX_RBBM_STATUS 0x030
#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33
#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
@@ -62,17 +65,29 @@
#define A3XX_RBBM_INT_CLEAR_CMD 0x061
#define A3XX_RBBM_INT_0_MASK 0x063
#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_PERFCTR_CTL 0x80
#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
+#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
+#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
+#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
+#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
+#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
#define A3XX_RBBM_RBBM_CTL 0x100
#define A3XX_RBBM_RBBM_CTL 0x100
#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112
+
+/* Following two are same as on A2XX, just in a different place */
#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
#define A3XX_CP_PFP_UCODE_DATA 0x1CA
#define A3XX_CP_ROQ_ADDR 0x1CC
#define A3XX_CP_ROQ_DATA 0x1CD
+#define A3XX_CP_MERCIU_ADDR 0x1D1
+#define A3XX_CP_MERCIU_DATA 0x1D2
+#define A3XX_CP_MERCIU_DATA2 0x1D3
#define A3XX_CP_MEQ_ADDR 0x1DA
#define A3XX_CP_MEQ_DATA 0x1DB
#define A3XX_CP_HW_FAULT 0x45C
@@ -147,10 +162,15 @@
#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
+#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
+#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
@@ -230,6 +250,7 @@
#define A3XX_TPL1_TP_VS_TEX_OFFSET 0x2340
#define A3XX_TPL1_TP_FS_TEX_OFFSET 0x2342
#define A3XX_TPL1_TP_FS_BORDER_COLOR_BASE_ADDR 0x2343
+#define A3XX_VBIF_CLKON 0x3001
#define A3XX_VBIF_FIXED_SORT_EN 0x300C
#define A3XX_VBIF_FIXED_SORT_SEL0 0x300D
#define A3XX_VBIF_FIXED_SORT_SEL1 0x300E
@@ -244,12 +265,16 @@
#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035
#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036
#define A3XX_VBIF_ARB_CTL 0x303C
+#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049
+#define A3XX_VBIF_OUT_AXI_AMEMTYPE_CONF0 0x3058
#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E
#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
+/* Bit flags for RBBM_CTL */
#define RBBM_RBBM_CTL_RESET_PWR_CTR1 (1 << 1)
#define RBBM_RBBM_CTL_ENABLE_PWR_CTR1 (1 << 17)
+/* Various flags used by the context switch code */
#define SP_MULTI 0
#define SP_BUFFER_MODE 1
@@ -302,6 +327,11 @@
#define UCHE_ENTIRE_CACHE 1
#define UCHE_OP_INVALIDATE 1
+/*
+ * The following are bit field shifts within some of the registers defined
+ * above. These are used in the context switch code in conjunction with the
+ * _SET macro
+ */
#define GRAS_CL_CLIP_CNTL_CLIP_DISABLE 16
#define GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER 12
@@ -476,6 +506,7 @@
#define VPC_VPCVARPSREPLMODE_COMPONENT16 28
#define VPC_VPCVARPSREPLMODE_COMPONENT17 30
+/* RBBM Debug bus block IDs */
#define RBBM_BLOCK_ID_NONE 0x0
#define RBBM_BLOCK_ID_CP 0x1
#define RBBM_BLOCK_ID_RBBM 0x2
@@ -505,6 +536,13 @@
#define RBBM_BLOCK_ID_MARB_2 0x2a
#define RBBM_BLOCK_ID_MARB_3 0x2b
+/* RBBM_CLOCK_CTL default value */
#define A3XX_RBBM_CLOCK_CTL_DEFAULT 0xBFFFFFFF
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define SP_FS_FULL_ALU_INSTRUCTIONS 0x0E
+#define SP_ALU_ACTIVE_CYCLES 0x1D
+#define SP0_ICL1_MISSES 0x1A
+#define SP_FS_CFLOW_INSTRUCTIONS 0x0C
+
#endif
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 3b3fba1..1886e04 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -10,15 +10,20 @@
* GNU General Public License for more details.
*
*/
-#include <linux/delay.h>
#include <linux/module.h>
#include <linux/uaccess.h>
#include <linux/vmalloc.h>
#include <linux/ioctl.h>
#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/msm_kgsl.h>
#include <mach/socinfo.h>
-#include <mach/board.h>
+#include <mach/msm_bus_board.h>
+#include <mach/msm_bus.h>
+#include <mach/msm_dcvs.h>
+#include <mach/msm_dcvs_scm.h>
#include "kgsl.h"
#include "kgsl_pwrscale.h"
@@ -28,8 +33,6 @@
#include "adreno.h"
#include "adreno_pm4types.h"
-#include "adreno_debugfs.h"
-#include "adreno_postmortem.h"
#include "a2xx_reg.h"
#include "a3xx_reg.h"
@@ -37,6 +40,7 @@
#define DRIVER_VERSION_MAJOR 3
#define DRIVER_VERSION_MINOR 1
+/* Adreno MH arbiter config*/
#define ADRENO_CFG_MHARB \
(0x10 \
| (0 << MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT) \
@@ -69,9 +73,6 @@
| (MMU_CONFIG << MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT))
static const struct kgsl_functable adreno_functable;
-static volatile int adreno_regwrite_footprint = 0;
-static volatile unsigned int *adreno_regwrite_reg;
-static volatile unsigned int adreno_regwrite_val;
static struct adreno_device device_3d0 = {
.dev = {
@@ -80,7 +81,14 @@
.id = KGSL_DEVICE_3D0,
.mh = {
.mharb = ADRENO_CFG_MHARB,
+ /* Remove 1k boundary check in z470 to avoid a GPU
+ * hang. Notice that this solution won't work if
+ * both EBI and SMI are used
+ */
.mh_intf_cfg1 = 0x00032f07,
+ /* turn off memory protection unit by setting
+ acceptable physical address range to include
+ all pages. */
.mpu_base = 0x00000000,
.mpu_range = 0xFFFFF000,
},
@@ -104,21 +112,43 @@
.gmem_size = SZ_256K,
.pfp_fw = NULL,
.pm4_fw = NULL,
- .wait_timeout = 0,
+ .wait_timeout = 0, /* in milliseconds, 0 means disabled */
.ib_check_level = 0,
};
-unsigned int hang_detect_regs[] = {
+/* This set of registers are used for Hang detection
+ * If the values of these registers are same after
+ * KGSL_TIMEOUT_PART time, GPU hang is reported in
+ * kernel log.
+ * *****ALERT******ALERT********ALERT*************
+ * Order of registers below is important, registers
+ * from LONG_IB_DETECT_REG_INDEX_START to
+ * LONG_IB_DETECT_REG_INDEX_END are used in long ib detection.
+ */
+#define LONG_IB_DETECT_REG_INDEX_START 1
+#define LONG_IB_DETECT_REG_INDEX_END 5
+
+unsigned int ft_detect_regs[] = {
A3XX_RBBM_STATUS,
- REG_CP_RB_RPTR,
+ REG_CP_RB_RPTR, /* LONG_IB_DETECT_REG_INDEX_START */
REG_CP_IB1_BASE,
REG_CP_IB1_BUFSZ,
REG_CP_IB2_BASE,
- REG_CP_IB2_BUFSZ,
+ REG_CP_IB2_BUFSZ, /* LONG_IB_DETECT_REG_INDEX_END */
+ 0,
+ 0,
+ 0,
+ 0,
+ 0,
+ 0
};
-const unsigned int hang_detect_regs_count = ARRAY_SIZE(hang_detect_regs);
+const unsigned int ft_detect_regs_count = ARRAY_SIZE(ft_detect_regs);
+/*
+ * This is the master list of all GPU cores that are supported by this
+ * driver.
+ */
#define ANY_ID (~0)
#define NO_VER (~0)
@@ -131,11 +161,15 @@
struct adreno_gpudev *gpudev;
unsigned int istore_size;
unsigned int pix_shader_start;
-
+ /* Size of an instruction in dwords */
unsigned int instruction_size;
-
+ /* size of gmem for gpu*/
unsigned int gmem_size;
+ /* version of pm4 microcode that supports sync_lock
+ between CPU and GPU for SMMU-v1 programming */
unsigned int sync_lock_pm4_ver;
+ /* version of pfp microcode that supports sync_lock
+ between CPU and GPU for SMMU-v1 programming */
unsigned int sync_lock_pfp_ver;
} adreno_gpulist[] = {
{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
@@ -150,6 +184,10 @@
{ ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID,
"leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev,
512, 384, 3, SZ_512K, NO_VER, NO_VER },
+ /*
+ * patchlevel 5 (8960v2) needs special pm4 firmware to work around
+ * a hardware problem.
+ */
{ ADRENO_REV_A225, 2, 2, 0, 5,
"a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
1536, 768, 3, SZ_512K, NO_VER, NO_VER },
@@ -159,26 +197,17 @@
{ ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID,
"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
1536, 768, 3, SZ_512K, 0x225011, 0x225002 },
-
+ /* A3XX doesn't use the pix_shader_start */
{ ADRENO_REV_A305, 3, 0, 5, ANY_ID,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
512, 0, 2, SZ_256K, 0x3FF037, 0x3FF016 },
-
- { ADRENO_REV_A320, 3, 2, 0, ANY_ID,
+ /* A3XX doesn't use the pix_shader_start */
+ { ADRENO_REV_A320, 3, 2, ANY_ID, ANY_ID,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
-};
-
-struct kgsl_process_name {
- char name[TASK_COMM_LEN+1];
-};
-
-static const struct kgsl_process_name kgsl_blocking_process_tbl[] = {
- {"SurfaceFlinger"},
- {"surfaceflinger"},
- {"ndroid.systemui"},
- {"droid.htcdialer"},
- {"mediaserver"},
+ { ADRENO_REV_A330, 3, 3, 0, 0,
+ "a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
+ 512, 0, 2, SZ_1M, NO_VER, NO_VER },
};
static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
@@ -197,7 +226,7 @@
}
}
-
+ /* Reset the time-out in our idle timer */
mod_timer_pending(&device->idle_timer,
jiffies + device->pwrctrl.interval_timeout);
return result;
@@ -269,25 +298,26 @@
unsigned int *cmds = &link[0];
int sizedwords = 0;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- struct kgsl_memdesc **reg_map_desc;
- void *reg_map_array = NULL;
int num_iommu_units, i;
struct kgsl_context *context;
struct adreno_context *adreno_ctx = NULL;
if (!adreno_dev->drawctxt_active)
return kgsl_mmu_device_setstate(&device->mmu, flags);
- num_iommu_units = kgsl_mmu_get_reg_map_desc(&device->mmu,
- ®_map_array);
+ num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
context = idr_find(&device->context_idr, context_id);
+ if (context == NULL)
+ return;
adreno_ctx = context->devctxt;
- reg_map_desc = reg_map_array;
-
if (kgsl_mmu_enable_clk(&device->mmu,
KGSL_IOMMU_CONTEXT_USER))
- goto done;
+ return;
+
+ cmds += __adreno_add_idle_indirect_cmds(cmds,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
if (cpu_is_msm8960())
cmds += adreno_add_change_mh_phys_limit_cmds(cmds, 0xFFFFF000,
@@ -301,46 +331,53 @@
cmds += adreno_add_idle_cmds(adreno_dev, cmds);
-
+ /* Acquire GPU-CPU sync Lock here */
cmds += kgsl_mmu_sync_lock(&device->mmu, cmds);
- pt_val = kgsl_mmu_pt_get_base_addr(device->mmu.hwpagetable);
+ pt_val = kgsl_mmu_get_pt_base_addr(&device->mmu,
+ device->mmu.hwpagetable);
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
+ /*
+ * We need to perfrom the following operations for all
+ * IOMMU units
+ */
for (i = 0; i < num_iommu_units; i++) {
- reg_pt_val = (pt_val &
- (KGSL_IOMMU_TTBR0_PA_MASK <<
- KGSL_IOMMU_TTBR0_PA_SHIFT)) +
- kgsl_mmu_get_pt_lsb(&device->mmu, i,
- KGSL_IOMMU_CONTEXT_USER);
+ reg_pt_val = (pt_val + kgsl_mmu_get_pt_lsb(&device->mmu,
+ i, KGSL_IOMMU_CONTEXT_USER));
+ /*
+ * Set address of the new pagetable by writng to IOMMU
+ * TTBR0 register
+ */
*cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
- *cmds++ = reg_map_desc[i]->gpuaddr +
- (KGSL_IOMMU_CONTEXT_USER <<
- KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_TTBR0;
+ *cmds++ = kgsl_mmu_get_reg_gpuaddr(&device->mmu, i,
+ KGSL_IOMMU_CONTEXT_USER, KGSL_IOMMU_CTX_TTBR0);
*cmds++ = reg_pt_val;
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
+ /*
+ * Read back the ttbr0 register as a barrier to ensure
+ * above writes have completed
+ */
cmds += adreno_add_read_cmds(device, cmds,
- reg_map_desc[i]->gpuaddr +
- (KGSL_IOMMU_CONTEXT_USER <<
- KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_TTBR0,
+ kgsl_mmu_get_reg_gpuaddr(&device->mmu, i,
+ KGSL_IOMMU_CONTEXT_USER, KGSL_IOMMU_CTX_TTBR0),
reg_pt_val,
device->mmu.setstate_memory.gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
}
}
if (flags & KGSL_MMUFLAGS_TLBFLUSH) {
+ /*
+ * tlb flush
+ */
for (i = 0; i < num_iommu_units; i++) {
- reg_pt_val = (pt_val &
- (KGSL_IOMMU_TTBR0_PA_MASK <<
- KGSL_IOMMU_TTBR0_PA_SHIFT)) +
- kgsl_mmu_get_pt_lsb(&device->mmu, i,
- KGSL_IOMMU_CONTEXT_USER);
+ reg_pt_val = (pt_val + kgsl_mmu_get_pt_lsb(&device->mmu,
+ i, KGSL_IOMMU_CONTEXT_USER));
*cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
- *cmds++ = (reg_map_desc[i]->gpuaddr +
- (KGSL_IOMMU_CONTEXT_USER <<
- KGSL_IOMMU_CTX_SHIFT) +
+ *cmds++ = kgsl_mmu_get_reg_gpuaddr(&device->mmu, i,
+ KGSL_IOMMU_CONTEXT_USER,
KGSL_IOMMU_CTX_TLBIALL);
*cmds++ = 1;
@@ -349,21 +386,22 @@
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
cmds += adreno_add_read_cmds(device, cmds,
- reg_map_desc[i]->gpuaddr +
- (KGSL_IOMMU_CONTEXT_USER <<
- KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_TTBR0,
+ kgsl_mmu_get_reg_gpuaddr(&device->mmu, i,
+ KGSL_IOMMU_CONTEXT_USER,
+ KGSL_IOMMU_CTX_TTBR0),
reg_pt_val,
device->mmu.setstate_memory.gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
}
}
-
+ /* Release GPU-CPU sync Lock here */
cmds += kgsl_mmu_sync_unlock(&device->mmu, cmds);
if (cpu_is_msm8960())
cmds += adreno_add_change_mh_phys_limit_cmds(cmds,
- reg_map_desc[num_iommu_units - 1]->gpuaddr - PAGE_SIZE,
+ kgsl_mmu_get_reg_gpuaddr(&device->mmu, 0,
+ 0, KGSL_IOMMU_GLOBAL_BASE),
device->mmu.setstate_memory.gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
else
@@ -376,26 +414,23 @@
sizedwords += (cmds - &link[0]);
if (sizedwords) {
-
+ /* invalidate all base pointers */
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
*cmds++ = 0x7fff;
sizedwords += 2;
- *cmds++ = cp_type3_packet(CP_INTERRUPT, 1);
- *cmds++ = CP_INT_CNTL__RB_INT_MASK;
- sizedwords += 2;
+ /* This returns the per context timestamp but we need to
+ * use the global timestamp for iommu clock disablement */
adreno_ringbuffer_issuecmds(device, adreno_ctx,
KGSL_CMD_FLAGS_PMODE,
&link[0], sizedwords);
kgsl_mmu_disable_clk_on_ts(&device->mmu,
adreno_dev->ringbuffer.timestamp[KGSL_MEMSTORE_GLOBAL], true);
}
+
if (sizedwords > (sizeof(link)/sizeof(unsigned int))) {
KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
BUG();
}
-done:
- if (num_iommu_units)
- kfree(reg_map_array);
}
static void adreno_gpummu_setstate(struct kgsl_device *device,
@@ -406,24 +441,36 @@
unsigned int link[32];
unsigned int *cmds = &link[0];
int sizedwords = 0;
- unsigned int mh_mmu_invalidate = 0x00000003;
+ unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */
struct kgsl_context *context;
struct adreno_context *adreno_ctx = NULL;
+ /*
+ * Fix target freeze issue by adding TLB flush for each submit
+ * on A20X based targets.
+ */
if (adreno_is_a20x(adreno_dev))
flags |= KGSL_MMUFLAGS_TLBFLUSH;
+ /*
+ * If possible, then set the state via the command stream to avoid
+ * a CPU idle. Otherwise, use the default setstate which uses register
+ * writes For CFF dump we must idle and use the registers so that it is
+ * easier to filter out the mmu accesses from the dump
+ */
if (!kgsl_cff_dump_enable && adreno_dev->drawctxt_active) {
context = idr_find(&device->context_idr, context_id);
+ if (context == NULL)
+ return;
adreno_ctx = context->devctxt;
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
-
+ /* wait for graphics pipe to be idle */
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
-
+ /* set page table base */
*cmds++ = cp_type0_packet(MH_MMU_PT_BASE, 1);
- *cmds++ = kgsl_mmu_pt_get_base_addr(
+ *cmds++ = kgsl_mmu_get_pt_base_addr(&device->mmu,
device->mmu.hwpagetable);
sizedwords += 4;
}
@@ -442,28 +489,40 @@
if (flags & KGSL_MMUFLAGS_PTUPDATE &&
adreno_is_a20x(adreno_dev)) {
+ /* HW workaround: to resolve MMU page fault interrupts
+ * caused by the VGT.It prevents the CP PFP from filling
+ * the VGT DMA request fifo too early,thereby ensuring
+ * that the VGT will not fetch vertex/bin data until
+ * after the page table base register has been updated.
+ *
+ * Two null DRAW_INDX_BIN packets are inserted right
+ * after the page table base update, followed by a
+ * wait for idle. The null packets will fill up the
+ * VGT DMA request fifo and prevent any further
+ * vertex/bin updates from occurring until the wait
+ * has finished. */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = (0x4 << 16) |
(REG_PA_SU_SC_MODE_CNTL - 0x2000);
- *cmds++ = 0;
+ *cmds++ = 0; /* disable faceness generation */
*cmds++ = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1);
*cmds++ = device->mmu.setstate_memory.gpuaddr;
*cmds++ = cp_type3_packet(CP_DRAW_INDX_BIN, 6);
- *cmds++ = 0;
- *cmds++ = 0x0003C004;
- *cmds++ = 0;
- *cmds++ = 3;
+ *cmds++ = 0; /* viz query info */
+ *cmds++ = 0x0003C004; /* draw indicator */
+ *cmds++ = 0; /* bin base */
+ *cmds++ = 3; /* bin size */
*cmds++ =
- device->mmu.setstate_memory.gpuaddr;
- *cmds++ = 6;
+ device->mmu.setstate_memory.gpuaddr; /* dma base */
+ *cmds++ = 6; /* dma size */
*cmds++ = cp_type3_packet(CP_DRAW_INDX_BIN, 6);
- *cmds++ = 0;
- *cmds++ = 0x0003C004;
- *cmds++ = 0;
- *cmds++ = 3;
-
+ *cmds++ = 0; /* viz query info */
+ *cmds++ = 0x0003C004; /* draw indicator */
+ *cmds++ = 0; /* bin base */
+ *cmds++ = 3; /* bin size */
+ /* dma base */
*cmds++ = device->mmu.setstate_memory.gpuaddr;
- *cmds++ = 6;
+ *cmds++ = 6; /* dma size */
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
sizedwords += 21;
@@ -472,7 +531,7 @@
if (flags & (KGSL_MMUFLAGS_PTUPDATE | KGSL_MMUFLAGS_TLBFLUSH)) {
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
- *cmds++ = 0x7fff;
+ *cmds++ = 0x7fff; /* invalidate all base pointers */
sizedwords += 2;
}
@@ -488,7 +547,7 @@
unsigned int context_id,
uint32_t flags)
{
-
+ /* call the mmu specific handler */
if (KGSL_MMU_TYPE_GPU == kgsl_mmu_get_mmutype())
return adreno_gpummu_setstate(device, context_id, flags);
else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
@@ -498,42 +557,16 @@
static unsigned int
a3xx_getchipid(struct kgsl_device *device)
{
- unsigned int majorid = 0, minorid = 0, patchid = 0;
+ struct kgsl_device_platform_data *pdata =
+ kgsl_device_get_drvdata(device);
+ /*
+ * All current A3XX chipids are detected at the SOC level. Leave this
+ * function here to support any future GPUs that have working
+ * chip ID registers
+ */
- unsigned int version = socinfo_get_version();
-
- if (cpu_is_apq8064() || cpu_is_apq8064ab()) {
-
-
- majorid = 2;
- minorid = 0;
-
-
- if (SOCINFO_VERSION_MAJOR(version) == 2) {
- patchid = 2;
- } else {
- if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
- (SOCINFO_VERSION_MINOR(version) == 1))
- patchid = 1;
- else
- patchid = 0;
- }
- } else if (cpu_is_msm8930() || cpu_is_msm8930aa() || cpu_is_msm8627()) {
-
-
- majorid = 0;
- minorid = 5;
-
-
- if ((SOCINFO_VERSION_MAJOR(version) == 1) &&
- (SOCINFO_VERSION_MINOR(version) == 2))
- patchid = 2;
- else
- patchid = 0;
- }
-
- return (0x03 << 24) | (majorid << 16) | (minorid << 8) | patchid;
+ return pdata->chipid;
}
static unsigned int
@@ -541,13 +574,23 @@
{
unsigned int chipid = 0;
unsigned int coreid, majorid, minorid, patchid, revid;
- uint32_t soc_platform_version = socinfo_get_version();
+ struct kgsl_device_platform_data *pdata =
+ kgsl_device_get_drvdata(device);
+
+ /* If the chip id is set at the platform level, then just use that */
+
+ if (pdata->chipid != 0)
+ return pdata->chipid;
adreno_regread(device, REG_RBBM_PERIPHID1, &coreid);
adreno_regread(device, REG_RBBM_PERIPHID2, &majorid);
adreno_regread(device, REG_RBBM_PATCH_RELEASE, &revid);
- if (cpu_is_msm8960() || cpu_is_msm8x60())
+ /*
+ * adreno 22x gpus are indicated by coreid 2,
+ * but REG_RBBM_PERIPHID1 always contains 0 for this field
+ */
+ if (cpu_is_msm8x60())
chipid = 2 << 24;
else
chipid = (coreid & 0xF) << 24;
@@ -558,14 +601,10 @@
patchid = ((revid >> 16) & 0xFF);
-
-
-
+ /* 8x50 returns 0 for patch release, but it should be 1 */
+ /* 8x25 returns 0 for minor id, but it should be 1 */
if (cpu_is_qsd8x50())
patchid = 1;
- else if (cpu_is_msm8960() &&
- SOCINFO_VERSION_MAJOR(soc_platform_version) == 3)
- patchid = 6;
else if (cpu_is_msm8625() && minorid == 0)
minorid = 1;
@@ -577,11 +616,18 @@
static unsigned int
adreno_getchipid(struct kgsl_device *device)
{
- if (cpu_is_apq8064() || cpu_is_apq8064ab() || cpu_is_msm8930() ||
- cpu_is_msm8930aa() || cpu_is_msm8627())
- return a3xx_getchipid(device);
- else
+ struct kgsl_device_platform_data *pdata =
+ kgsl_device_get_drvdata(device);
+
+ /*
+ * All A3XX chipsets will have pdata set, so assume !pdata->chipid is
+ * an A2XX processor
+ */
+
+ if (pdata->chipid == 0 || ADRENO_CHIPID_MAJOR(pdata->chipid) == 2)
return a2xx_getchipid(device);
+ else
+ return a3xx_getchipid(device);
}
static inline bool _rev_match(unsigned int id, unsigned int entry)
@@ -596,10 +642,10 @@
adreno_dev->chip_id = adreno_getchipid(&adreno_dev->dev);
- core = (adreno_dev->chip_id >> 24) & 0xff;
- major = (adreno_dev->chip_id >> 16) & 0xff;
- minor = (adreno_dev->chip_id >> 8) & 0xff;
- patchid = (adreno_dev->chip_id & 0xff);
+ core = ADRENO_CHIPID_CORE(adreno_dev->chip_id);
+ major = ADRENO_CHIPID_MAJOR(adreno_dev->chip_id);
+ minor = ADRENO_CHIPID_MINOR(adreno_dev->chip_id);
+ patchid = ADRENO_CHIPID_PATCH(adreno_dev->chip_id);
for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) {
if (core == adreno_gpulist[i].core &&
@@ -626,12 +672,490 @@
}
+static struct platform_device_id adreno_id_table[] = {
+ { DEVICE_3D0_NAME, (kernel_ulong_t)&device_3d0.dev, },
+ {},
+};
+
+MODULE_DEVICE_TABLE(platform, adreno_id_table);
+
+static struct of_device_id adreno_match_table[] = {
+ { .compatible = "qcom,kgsl-3d0", },
+ {}
+};
+
+static inline int adreno_of_read_property(struct device_node *node,
+ const char *prop, unsigned int *ptr)
+{
+ int ret = of_property_read_u32(node, prop, ptr);
+ if (ret)
+ KGSL_CORE_ERR("Unable to read '%s'\n", prop);
+ return ret;
+}
+
+static struct device_node *adreno_of_find_subnode(struct device_node *parent,
+ const char *name)
+{
+ struct device_node *child;
+
+ for_each_child_of_node(parent, child) {
+ if (of_device_is_compatible(child, name))
+ return child;
+ }
+
+ return NULL;
+}
+
+static int adreno_of_get_pwrlevels(struct device_node *parent,
+ struct kgsl_device_platform_data *pdata)
+{
+ struct device_node *node, *child;
+ int ret = -EINVAL;
+
+ node = adreno_of_find_subnode(parent, "qcom,gpu-pwrlevels");
+
+ if (node == NULL) {
+ KGSL_CORE_ERR("Unable to find 'qcom,gpu-pwrlevels'\n");
+ return -EINVAL;
+ }
+
+ pdata->num_levels = 0;
+
+ for_each_child_of_node(node, child) {
+ unsigned int index;
+ struct kgsl_pwrlevel *level;
+
+ if (adreno_of_read_property(child, "reg", &index))
+ goto done;
+
+ if (index >= KGSL_MAX_PWRLEVELS) {
+ KGSL_CORE_ERR("Pwrlevel index %d is out of range\n",
+ index);
+ continue;
+ }
+
+ if (index >= pdata->num_levels)
+ pdata->num_levels = index + 1;
+
+ level = &pdata->pwrlevel[index];
+
+ if (adreno_of_read_property(child, "qcom,gpu-freq",
+ &level->gpu_freq))
+ goto done;
+
+ if (adreno_of_read_property(child, "qcom,bus-freq",
+ &level->bus_freq))
+ goto done;
+
+ if (adreno_of_read_property(child, "qcom,io-fraction",
+ &level->io_fraction))
+ level->io_fraction = 0;
+ }
+
+ if (adreno_of_read_property(parent, "qcom,initial-pwrlevel",
+ &pdata->init_level))
+ pdata->init_level = 1;
+
+ if (pdata->init_level < 0 || pdata->init_level > pdata->num_levels) {
+ KGSL_CORE_ERR("Initial power level out of range\n");
+ pdata->init_level = 1;
+ }
+
+ ret = 0;
+done:
+ return ret;
+
+}
+
+static struct msm_dcvs_core_info *adreno_of_get_dcvs(struct device_node *parent)
+{
+ struct device_node *node, *child;
+ struct msm_dcvs_core_info *info = NULL;
+ int count = 0;
+ int ret = -EINVAL;
+
+ node = adreno_of_find_subnode(parent, "qcom,dcvs-core-info");
+ if (node == NULL)
+ return ERR_PTR(-EINVAL);
+
+ info = kzalloc(sizeof(*info), GFP_KERNEL);
+
+ if (info == NULL) {
+ KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*info));
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ for_each_child_of_node(node, child)
+ count++;
+
+ info->power_param.num_freq = count;
+
+ info->freq_tbl = kzalloc(info->power_param.num_freq *
+ sizeof(struct msm_dcvs_freq_entry),
+ GFP_KERNEL);
+
+ if (info->freq_tbl == NULL) {
+ KGSL_CORE_ERR("kzalloc(%d) failed\n",
+ info->power_param.num_freq *
+ sizeof(struct msm_dcvs_freq_entry));
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ for_each_child_of_node(node, child) {
+ unsigned int index;
+
+ if (adreno_of_read_property(child, "reg", &index))
+ goto err;
+
+ if (index >= info->power_param.num_freq) {
+ KGSL_CORE_ERR("DCVS freq entry %d is out of range\n",
+ index);
+ continue;
+ }
+
+ if (adreno_of_read_property(child, "qcom,freq",
+ &info->freq_tbl[index].freq))
+ goto err;
+
+ if (adreno_of_read_property(child, "qcom,voltage",
+ &info->freq_tbl[index].voltage))
+ info->freq_tbl[index].voltage = 0;
+
+ if (adreno_of_read_property(child, "qcom,is_trans_level",
+ &info->freq_tbl[index].is_trans_level))
+ info->freq_tbl[index].is_trans_level = 0;
+
+ if (adreno_of_read_property(child, "qcom,active-energy-offset",
+ &info->freq_tbl[index].active_energy_offset))
+ info->freq_tbl[index].active_energy_offset = 0;
+
+ if (adreno_of_read_property(child, "qcom,leakage-energy-offset",
+ &info->freq_tbl[index].leakage_energy_offset))
+ info->freq_tbl[index].leakage_energy_offset = 0;
+ }
+
+ if (adreno_of_read_property(node, "qcom,num-cores", &info->num_cores))
+ goto err;
+
+ info->sensors = kzalloc(info->num_cores *
+ sizeof(int),
+ GFP_KERNEL);
+
+ for (count = 0; count < info->num_cores; count++) {
+ if (adreno_of_read_property(node, "qcom,sensors",
+ &(info->sensors[count])))
+ goto err;
+ }
+
+ if (adreno_of_read_property(node, "qcom,core-core-type",
+ &info->core_param.core_type))
+ goto err;
+
+ if (adreno_of_read_property(node, "qcom,algo-disable-pc-threshold",
+ &info->algo_param.disable_pc_threshold))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-em-win-size-min-us",
+ &info->algo_param.em_win_size_min_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-em-win-size-max-us",
+ &info->algo_param.em_win_size_max_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-em-max-util-pct",
+ &info->algo_param.em_max_util_pct))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-group-id",
+ &info->algo_param.group_id))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-max-freq-chg-time-us",
+ &info->algo_param.max_freq_chg_time_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-slack-mode-dynamic",
+ &info->algo_param.slack_mode_dynamic))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-slack-weight-thresh-pct",
+ &info->algo_param.slack_weight_thresh_pct))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-slack-time-min-us",
+ &info->algo_param.slack_time_min_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-slack-time-max-us",
+ &info->algo_param.slack_time_max_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-ss-win-size-min-us",
+ &info->algo_param.ss_win_size_min_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-ss-win-size-max-us",
+ &info->algo_param.ss_win_size_max_us))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-ss-util-pct",
+ &info->algo_param.ss_util_pct))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,algo-ss-no-corr-below-freq",
+ &info->algo_param.ss_no_corr_below_freq))
+ goto err;
+
+ if (adreno_of_read_property(node, "qcom,energy-active-coeff-a",
+ &info->energy_coeffs.active_coeff_a))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-active-coeff-b",
+ &info->energy_coeffs.active_coeff_b))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-active-coeff-c",
+ &info->energy_coeffs.active_coeff_c))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-leakage-coeff-a",
+ &info->energy_coeffs.leakage_coeff_a))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-leakage-coeff-b",
+ &info->energy_coeffs.leakage_coeff_b))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-leakage-coeff-c",
+ &info->energy_coeffs.leakage_coeff_c))
+ goto err;
+ if (adreno_of_read_property(node, "qcom,energy-leakage-coeff-d",
+ &info->energy_coeffs.leakage_coeff_d))
+ goto err;
+
+ if (adreno_of_read_property(node, "qcom,power-current-temp",
+ &info->power_param.current_temp))
+ goto err;
+
+ return info;
+
+err:
+ if (info)
+ kfree(info->freq_tbl);
+
+ kfree(info);
+
+ return ERR_PTR(ret);
+}
+
+static int adreno_of_get_iommu(struct device_node *parent,
+ struct kgsl_device_platform_data *pdata)
+{
+ struct device_node *node, *child;
+ struct kgsl_device_iommu_data *data = NULL;
+ struct kgsl_iommu_ctx *ctxs = NULL;
+ u32 reg_val[2];
+ int ctx_index = 0;
+
+ node = of_parse_phandle(parent, "iommu", 0);
+ if (node == NULL)
+ return -EINVAL;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (data == NULL) {
+ KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*data));
+ goto err;
+ }
+
+ if (of_property_read_u32_array(node, "reg", reg_val, 2))
+ goto err;
+
+ data->physstart = reg_val[0];
+ data->physend = data->physstart + reg_val[1] - 1;
+
+ data->iommu_ctx_count = 0;
+
+ for_each_child_of_node(node, child)
+ data->iommu_ctx_count++;
+
+ ctxs = kzalloc(data->iommu_ctx_count * sizeof(struct kgsl_iommu_ctx),
+ GFP_KERNEL);
+
+ if (ctxs == NULL) {
+ KGSL_CORE_ERR("kzalloc(%d) failed\n",
+ data->iommu_ctx_count * sizeof(struct kgsl_iommu_ctx));
+ goto err;
+ }
+
+ for_each_child_of_node(node, child) {
+ int ret = of_property_read_string(child, "label",
+ &ctxs[ctx_index].iommu_ctx_name);
+
+ if (ret) {
+ KGSL_CORE_ERR("Unable to read KGSL IOMMU 'label'\n");
+ goto err;
+ }
+
+ if (adreno_of_read_property(child, "qcom,iommu-ctx-sids",
+ &ctxs[ctx_index].ctx_id))
+ goto err;
+
+ ctx_index++;
+ }
+
+ data->iommu_ctxs = ctxs;
+
+ pdata->iommu_data = data;
+ pdata->iommu_count = 1;
+
+ return 0;
+
+err:
+ kfree(ctxs);
+ kfree(data);
+
+ return -EINVAL;
+}
+
+static int adreno_of_get_pdata(struct platform_device *pdev)
+{
+ struct kgsl_device_platform_data *pdata = NULL;
+ struct kgsl_device *device;
+ int ret = -EINVAL;
+
+ pdev->id_entry = adreno_id_table;
+
+ pdata = pdev->dev.platform_data;
+ if (pdata)
+ return 0;
+
+ if (of_property_read_string(pdev->dev.of_node, "label", &pdev->name)) {
+ KGSL_CORE_ERR("Unable to read 'label'\n");
+ goto err;
+ }
+
+ if (adreno_of_read_property(pdev->dev.of_node, "qcom,id", &pdev->id))
+ goto err;
+
+ pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+ if (pdata == NULL) {
+ KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*pdata));
+ ret = -ENOMEM;
+ goto err;
+ }
+
+ if (adreno_of_read_property(pdev->dev.of_node, "qcom,chipid",
+ &pdata->chipid))
+ goto err;
+
+ /* pwrlevel Data */
+ ret = adreno_of_get_pwrlevels(pdev->dev.of_node, pdata);
+ if (ret)
+ goto err;
+
+ /* Default value is 83, if not found in DT */
+ if (adreno_of_read_property(pdev->dev.of_node, "qcom,idle-timeout",
+ &pdata->idle_timeout))
+ pdata->idle_timeout = 83;
+
+ if (adreno_of_read_property(pdev->dev.of_node, "qcom,nap-allowed",
+ &pdata->nap_allowed))
+ pdata->nap_allowed = 1;
+
+ if (adreno_of_read_property(pdev->dev.of_node, "qcom,clk-map",
+ &pdata->clk_map))
+ goto err;
+
+ device = (struct kgsl_device *)pdev->id_entry->driver_data;
+
+ if (device->id != KGSL_DEVICE_3D0)
+ goto err;
+
+ /* Bus Scale Data */
+
+ pdata->bus_scale_table = msm_bus_cl_get_pdata(pdev);
+ if (IS_ERR_OR_NULL(pdata->bus_scale_table)) {
+ ret = PTR_ERR(pdata->bus_scale_table);
+ goto err;
+ }
+
+ pdata->core_info = adreno_of_get_dcvs(pdev->dev.of_node);
+ if (IS_ERR_OR_NULL(pdata->core_info)) {
+ ret = PTR_ERR(pdata->core_info);
+ goto err;
+ }
+
+ ret = adreno_of_get_iommu(pdev->dev.of_node, pdata);
+ if (ret)
+ goto err;
+
+ pdev->dev.platform_data = pdata;
+ return 0;
+
+err:
+ if (pdata) {
+ if (pdata->core_info)
+ kfree(pdata->core_info->freq_tbl);
+ kfree(pdata->core_info);
+
+ if (pdata->iommu_data)
+ kfree(pdata->iommu_data->iommu_ctxs);
+
+ kfree(pdata->iommu_data);
+ }
+
+ kfree(pdata);
+
+ return ret;
+}
+
+#ifdef CONFIG_MSM_OCMEM
+static int
+adreno_ocmem_gmem_malloc(struct adreno_device *adreno_dev)
+{
+ if (!adreno_is_a330(adreno_dev))
+ return 0;
+
+ /* OCMEM is only needed once, do not support consective allocation */
+ if (adreno_dev->ocmem_hdl != NULL)
+ return 0;
+
+ adreno_dev->ocmem_hdl =
+ ocmem_allocate(OCMEM_GRAPHICS, adreno_dev->gmem_size);
+ if (adreno_dev->ocmem_hdl == NULL)
+ return -ENOMEM;
+
+ adreno_dev->gmem_size = adreno_dev->ocmem_hdl->len;
+ adreno_dev->ocmem_base = adreno_dev->ocmem_hdl->addr;
+
+ return 0;
+}
+
+static void
+adreno_ocmem_gmem_free(struct adreno_device *adreno_dev)
+{
+ if (!adreno_is_a330(adreno_dev))
+ return;
+
+ if (adreno_dev->ocmem_hdl == NULL)
+ return;
+
+ ocmem_free(OCMEM_GRAPHICS, adreno_dev->ocmem_hdl);
+ adreno_dev->ocmem_hdl = NULL;
+}
+#else
+static int
+adreno_ocmem_gmem_malloc(struct adreno_device *adreno_dev)
+{
+ return 0;
+}
+
+static void
+adreno_ocmem_gmem_free(struct adreno_device *adreno_dev)
+{
+}
+#endif
+
static int __devinit
adreno_probe(struct platform_device *pdev)
{
struct kgsl_device *device;
struct adreno_device *adreno_dev;
int status = -EINVAL;
+ bool is_dt;
+
+ is_dt = of_match_device(adreno_match_table, &pdev->dev);
+
+ if (is_dt && pdev->dev.of_node) {
+ status = adreno_of_get_pdata(pdev);
+ if (status)
+ goto error_return;
+ }
device = (struct kgsl_device *)pdev->id_entry->driver_data;
adreno_dev = ADRENO_DEVICE(device);
@@ -657,6 +1181,7 @@
adreno_ringbuffer_close(&adreno_dev->ringbuffer);
error:
device->parentdev = NULL;
+error_return:
return status;
}
@@ -682,13 +1207,13 @@
int status = -EINVAL;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- if (KGSL_STATE_DUMP_AND_RECOVER != device->state)
+ if (KGSL_STATE_DUMP_AND_FT != device->state)
kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
-
+ /* Power up the device */
kgsl_pwrctrl_enable(device);
-
+ /* Identify the specific GPU */
adreno_identify_gpu(adreno_dev);
if (adreno_ringbuffer_read_pm4_ucode(device)) {
@@ -710,6 +1235,10 @@
}
+ /*
+ * Check if firmware supports the sync lock PM4 packets needed
+ * for IOMMUv1
+ */
if ((adreno_dev->pm4_fw_version >=
adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pm4_ver) &&
@@ -717,8 +1246,12 @@
adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver))
device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC;
-
+ /* Set up the MMU */
if (adreno_is_a2xx(adreno_dev)) {
+ /*
+ * the MH_CLNT_INTF_CTRL_CONFIG registers aren't present
+ * on older gpus
+ */
if (adreno_is_a20x(adreno_dev)) {
device->mh.mh_intf_cfg1 = 0;
device->mh.mh_intf_cfg2 = 0;
@@ -727,13 +1260,31 @@
kgsl_mh_start(device);
}
- hang_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
+ /* Assign correct RBBM status register to hang detect regs
+ */
+ ft_detect_regs[0] = adreno_dev->gpudev->reg_rbbm_status;
+
+ /* Add A3XX specific registers for hang detection */
+ if (adreno_is_a3xx(adreno_dev)) {
+ ft_detect_regs[6] = A3XX_RBBM_PERFCTR_SP_7_LO;
+ ft_detect_regs[7] = A3XX_RBBM_PERFCTR_SP_7_HI;
+ ft_detect_regs[8] = A3XX_RBBM_PERFCTR_SP_6_LO;
+ ft_detect_regs[9] = A3XX_RBBM_PERFCTR_SP_6_HI;
+ ft_detect_regs[10] = A3XX_RBBM_PERFCTR_SP_5_LO;
+ ft_detect_regs[11] = A3XX_RBBM_PERFCTR_SP_5_HI;
+ }
status = kgsl_mmu_start(device);
if (status)
goto error_clk_off;
-
+ status = adreno_ocmem_gmem_malloc(adreno_dev);
+ if (status) {
+ KGSL_DRV_ERR(device, "OCMEM malloc failed\n");
+ goto error_mmu_off;
+ }
+
+ /* Start the GPU */
adreno_dev->gpudev->start(adreno_dev);
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
@@ -741,13 +1292,18 @@
status = adreno_ringbuffer_start(&adreno_dev->ringbuffer, init_ram);
if (status == 0) {
- if (KGSL_STATE_DUMP_AND_RECOVER != device->state)
+ /* While fault tolerance is on we do not want timer to
+ * fire and attempt to change any device state */
+ if (KGSL_STATE_DUMP_AND_FT != device->state)
mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
return 0;
}
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+error_mmu_off:
kgsl_mmu_stop(&device->mmu);
+
error_clk_off:
kgsl_pwrctrl_disable(device);
@@ -762,32 +1318,41 @@
adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
+ kgsl_mmu_stop(&device->mmu);
+
device->ftbl->irqctrl(device, 0);
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
del_timer_sync(&device->idle_timer);
- kgsl_mmu_stop(&device->mmu);
-
+ adreno_ocmem_gmem_free(adreno_dev);
+
+ /* Power down the device */
kgsl_pwrctrl_disable(device);
return 0;
}
static void adreno_mark_context_status(struct kgsl_device *device,
- int recovery_status)
+ int ft_status)
{
struct kgsl_context *context;
int next = 0;
+ /*
+ * Set the reset status of all contexts to
+ * INNOCENT_CONTEXT_RESET_EXT except for the bad context
+ * since thats the guilty party, if fault tolerance failed then
+ * mark all as guilty
+ */
while ((context = idr_get_next(&device->context_idr, &next))) {
struct adreno_context *adreno_context = context->devctxt;
- if (recovery_status) {
+ if (ft_status) {
context->reset_status =
KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
} else if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
context->reset_status) {
- if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG ||
- CTXT_FLAGS_GPU_HANG_RECOVERED))
+ if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG |
+ CTXT_FLAGS_GPU_HANG_FT))
context->reset_status =
KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
else
@@ -822,209 +1387,610 @@
}
}
-static void adreno_destroy_recovery_data(struct adreno_recovery_data *rec_data)
+static void adreno_destroy_ft_data(struct adreno_ft_data *ft_data)
{
- vfree(rec_data->rb_buffer);
- vfree(rec_data->bad_rb_buffer);
+ vfree(ft_data->rb_buffer);
+ vfree(ft_data->bad_rb_buffer);
+ vfree(ft_data->good_rb_buffer);
}
-static int adreno_setup_recovery_data(struct kgsl_device *device,
- struct adreno_recovery_data *rec_data)
+static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
+ unsigned int *ptr,
+ bool inc)
+{
+ int status = -EINVAL;
+ unsigned int val1;
+ unsigned int size = rb->buffer_desc.size;
+ unsigned int start_ptr = *ptr;
+
+ while ((start_ptr / sizeof(unsigned int)) != rb->wptr) {
+ if (inc)
+ start_ptr = adreno_ringbuffer_inc_wrapped(start_ptr,
+ size);
+ else
+ start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr,
+ size);
+ kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr);
+ if (KGSL_CMD_IDENTIFIER == val1) {
+ if ((start_ptr / sizeof(unsigned int)) != rb->wptr)
+ start_ptr = adreno_ringbuffer_dec_wrapped(
+ start_ptr, size);
+ *ptr = start_ptr;
+ status = 0;
+ break;
+ }
+ }
+ return status;
+}
+
+static int _find_cmd_seq_after_eop_ts(struct adreno_ringbuffer *rb,
+ unsigned int *rb_rptr,
+ unsigned int global_eop,
+ bool inc)
+{
+ int status = -EINVAL;
+ unsigned int temp_rb_rptr = *rb_rptr;
+ unsigned int size = rb->buffer_desc.size;
+ unsigned int val[3];
+ int i = 0;
+ bool check = false;
+
+ if (inc && temp_rb_rptr / sizeof(unsigned int) != rb->wptr)
+ return status;
+
+ do {
+ /*
+ * when decrementing we need to decrement first and
+ * then read make sure we cover all the data
+ */
+ if (!inc)
+ temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
+ temp_rb_rptr, size);
+ kgsl_sharedmem_readl(&rb->buffer_desc, &val[i],
+ temp_rb_rptr);
+
+ if (check && ((inc && val[i] == global_eop) ||
+ (!inc && (val[i] ==
+ cp_type3_packet(CP_MEM_WRITE, 2) ||
+ val[i] == CACHE_FLUSH_TS)))) {
+ /* decrement i, i.e i = (i - 1 + 3) % 3 if
+ * we are going forward, else increment i */
+ i = (i + 2) % 3;
+ if (val[i] == rb->device->memstore.gpuaddr +
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp)) {
+ int j = ((i + 2) % 3);
+ if ((inc && (val[j] == CACHE_FLUSH_TS ||
+ val[j] == cp_type3_packet(
+ CP_MEM_WRITE, 2))) ||
+ (!inc && val[j] == global_eop)) {
+ /* Found the global eop */
+ status = 0;
+ break;
+ }
+ }
+ /* if no match found then increment i again
+ * since we decremented before matching */
+ i = (i + 1) % 3;
+ }
+ if (inc)
+ temp_rb_rptr = adreno_ringbuffer_inc_wrapped(
+ temp_rb_rptr, size);
+
+ i = (i + 1) % 3;
+ if (2 == i)
+ check = true;
+ } while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr);
+ /* temp_rb_rptr points to the command stream after global eop,
+ * move backward till the start of command sequence */
+ if (!status) {
+ status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false);
+ if (!status) {
+ *rb_rptr = temp_rb_rptr;
+ KGSL_FT_INFO(rb->device,
+ "Offset of cmd sequence after eop timestamp: 0x%x\n",
+ temp_rb_rptr / sizeof(unsigned int));
+ }
+ }
+ if (status)
+ KGSL_FT_ERR(rb->device,
+ "Failed to find the command sequence after eop timestamp\n");
+ return status;
+}
+
+static int _find_hanging_ib_sequence(struct adreno_ringbuffer *rb,
+ unsigned int *rb_rptr,
+ unsigned int ib1)
+{
+ int status = -EINVAL;
+ unsigned int temp_rb_rptr = *rb_rptr;
+ unsigned int size = rb->buffer_desc.size;
+ unsigned int val[2];
+ int i = 0;
+ bool check = false;
+ bool ctx_switch = false;
+
+ while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
+ kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
+
+ if (check && val[i] == ib1) {
+ /* decrement i, i.e i = (i - 1 + 2) % 2 */
+ i = (i + 1) % 2;
+ if (adreno_cmd_is_ib(val[i])) {
+ /* go till start of command sequence */
+ status = _find_start_of_cmd_seq(rb,
+ &temp_rb_rptr, false);
+
+ KGSL_FT_INFO(rb->device,
+ "Found the hanging IB at offset 0x%x\n",
+ temp_rb_rptr / sizeof(unsigned int));
+ break;
+ }
+ /* if no match the increment i since we decremented
+ * before checking */
+ i = (i + 1) % 2;
+ }
+ /* Make sure you do not encounter a context switch twice, we can
+ * encounter it once for the bad context as the start of search
+ * can point to the context switch */
+ if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
+ if (ctx_switch) {
+ KGSL_FT_ERR(rb->device,
+ "Context switch encountered before bad "
+ "IB found\n");
+ break;
+ }
+ ctx_switch = true;
+ }
+ i = (i + 1) % 2;
+ if (1 == i)
+ check = true;
+ temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
+ size);
+ }
+ if (!status)
+ *rb_rptr = temp_rb_rptr;
+ return status;
+}
+
+static int adreno_setup_ft_data(struct kgsl_device *device,
+ struct adreno_ft_data *ft_data)
{
int ret = 0;
- unsigned int ib1_sz, ib2_sz;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+ struct kgsl_context *context;
+ struct adreno_context *adreno_context;
+ unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
- memset(rec_data, 0, sizeof(*rec_data));
+ memset(ft_data, 0, sizeof(*ft_data));
+ ft_data->start_of_replay_cmds = 0xFFFFFFFF;
+ ft_data->replay_for_snapshot = 0xFFFFFFFF;
- adreno_regread(device, REG_CP_IB1_BUFSZ, &ib1_sz);
- adreno_regread(device, REG_CP_IB2_BUFSZ, &ib2_sz);
- if (ib1_sz || ib2_sz)
- adreno_regread(device, REG_CP_IB1_BASE, &rec_data->ib1);
+ adreno_regread(device, REG_CP_IB1_BASE, &ft_data->ib1);
- kgsl_sharedmem_readl(&device->memstore, &rec_data->context_id,
+ kgsl_sharedmem_readl(&device->memstore, &ft_data->context_id,
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
current_context));
kgsl_sharedmem_readl(&device->memstore,
- &rec_data->global_eop,
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- eoptimestamp));
+ &ft_data->global_eop,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp));
- rec_data->rb_buffer = vmalloc(rb->buffer_desc.size);
- if (!rec_data->rb_buffer) {
+ ft_data->rb_buffer = vmalloc(rb->buffer_desc.size);
+ if (!ft_data->rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
return -ENOMEM;
}
- rec_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
- if (!rec_data->bad_rb_buffer) {
+ ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
+ if (!ft_data->bad_rb_buffer) {
KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
rb->buffer_desc.size);
ret = -ENOMEM;
goto done;
}
+ ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
+ if (!ft_data->good_rb_buffer) {
+ KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
+ rb->buffer_desc.size);
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ ft_data->status = 0;
+
+ /* find the start of bad command sequence in rb */
+ context = idr_find(&device->context_idr, ft_data->context_id);
+ /* Look for the command stream that is right after the global eop */
+
+ if (!context) {
+ /*
+ * If there is no context then fault tolerance does not need to
+ * replay anything, just reset GPU and thats it
+ */
+ goto done;
+ }
+ ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
+ ft_data->global_eop + 1, false);
+ if (ret)
+ goto done;
+
+ ft_data->start_of_replay_cmds = rb_rptr;
+
+ if (!adreno_dev->ft_policy)
+ adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
+
+ ft_data->ft_policy = adreno_dev->ft_policy;
+
+
+ adreno_context = context->devctxt;
+ if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
+ if (ft_data->ib1) {
+ ret = _find_hanging_ib_sequence(rb,
+ &rb_rptr, ft_data->ib1);
+ if (ret) {
+ KGSL_FT_ERR(device,
+ "Start not found for replay IB sequence\n");
+ ret = 0;
+ goto done;
+ }
+ ft_data->start_of_replay_cmds = rb_rptr;
+ ft_data->replay_for_snapshot = rb_rptr;
+ }
+ }
+
done:
if (ret) {
- vfree(rec_data->rb_buffer);
- vfree(rec_data->bad_rb_buffer);
+ vfree(ft_data->rb_buffer);
+ vfree(ft_data->bad_rb_buffer);
+ vfree(ft_data->good_rb_buffer);
}
return ret;
}
static int
-_adreno_recover_hang(struct kgsl_device *device,
- struct adreno_recovery_data *rec_data,
- bool try_bad_commands)
+_adreno_check_long_ib(struct kgsl_device *device)
{
- int ret;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
- struct kgsl_context *context;
- struct adreno_context *adreno_context = NULL;
- struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
+ unsigned int curr_global_ts = 0;
- context = idr_find(&device->context_idr, rec_data->context_id);
- if (context == NULL) {
- KGSL_DRV_ERR(device, "Last context unknown id:%d\n",
- rec_data->context_id);
+ /* check if the global ts is still the same */
+ kgsl_sharedmem_readl(&device->memstore,
+ &curr_global_ts,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp));
+
+ /* Mark long ib as handled */
+ adreno_dev->long_ib = 0;
+
+ if (curr_global_ts == adreno_dev->long_ib_ts) {
+ KGSL_FT_ERR(device,
+ "IB ran too long, invalidate ctxt\n");
+ return 1;
} else {
- adreno_context = context->devctxt;
- adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
+ /* Do nothing GPU has gone ahead */
+ KGSL_FT_INFO(device, "false long ib detection return\n");
+ return 0;
+ }
+}
+
+static int
+_adreno_ft_restart_device(struct kgsl_device *device,
+ struct kgsl_context *context,
+ struct adreno_ft_data *ft_data)
+{
+
+ struct adreno_context *adreno_context = context->devctxt;
+
+ /* restart device */
+ if (adreno_stop(device)) {
+ KGSL_FT_ERR(device, "Device stop failed\n");
+ return 1;
}
- ret = adreno_ringbuffer_extract(rb, rec_data);
- if (ret)
- goto done;
-
-
- ret = adreno_stop(device);
- if (ret) {
- KGSL_DRV_ERR(device, "Device stop failed in recovery\n");
- goto done;
- }
-
- ret = adreno_start(device, true);
- if (ret) {
- KGSL_DRV_ERR(device, "Device start failed in recovery\n");
- goto done;
+ if (adreno_start(device, true)) {
+ KGSL_FT_ERR(device, "Device start failed\n");
+ return 1;
}
if (context)
kgsl_mmu_setstate(&device->mmu, adreno_context->pagetable,
KGSL_MEMSTORE_GLOBAL);
+ /* If iommu is used then we need to make sure that the iommu clocks
+ * are on since there could be commands in pipeline that touch iommu */
if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
- ret = kgsl_mmu_enable_clk(&device->mmu,
- KGSL_IOMMU_CONTEXT_USER);
- if (ret)
- goto done;
+ if (kgsl_mmu_enable_clk(&device->mmu,
+ KGSL_IOMMU_CONTEXT_USER))
+ return 1;
}
- if (!try_bad_commands)
- rec_data->bad_rb_size = 0;
+ return 0;
+}
- if (rec_data->bad_rb_size) {
- int idle_ret;
- adreno_ringbuffer_restore(rb, rec_data->bad_rb_buffer,
- rec_data->bad_rb_size);
- idle_ret = adreno_idle(device);
- if (idle_ret) {
- ret = adreno_stop(device);
- if (ret) {
- KGSL_DRV_ERR(device,
- "Device stop failed in recovery\n");
- goto done;
- }
- ret = adreno_start(device, true);
- if (ret) {
- KGSL_DRV_ERR(device,
- "Device start failed in recovery\n");
- goto done;
- }
- if (context)
- kgsl_mmu_setstate(&device->mmu,
- adreno_context->pagetable,
- KGSL_MEMSTORE_GLOBAL);
+static inline void
+_adreno_debug_ft_info(struct kgsl_device *device,
+ struct adreno_ft_data *ft_data)
+{
- if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
- ret = kgsl_mmu_enable_clk(&device->mmu,
- KGSL_IOMMU_CONTEXT_USER);
- if (ret)
- goto done;
- }
+ /*
+ * Dumping rb is a very useful tool to debug FT.
+ * It will tell us if we are extracting the rb correctly
+ * NOP'ing the right IB, skipping the EOF correctly etc.
+ */
+ if (device->ft_log >= 7) {
- ret = idle_ret;
- KGSL_DRV_ERR(device,
- "Bad context commands hung in recovery\n");
- } else {
- KGSL_DRV_ERR(device,
- "Bad context commands succeeded in recovery\n");
- if (adreno_context)
- adreno_context->flags = (adreno_context->flags &
- ~CTXT_FLAGS_GPU_HANG) |
- CTXT_FLAGS_GPU_HANG_RECOVERED;
- adreno_dev->drawctxt_active = last_active_ctx;
- }
+ /* Print fault tolerance data here */
+ KGSL_FT_INFO(device, "Temp RB buffer size 0x%X\n",
+ ft_data->rb_size);
+ adreno_dump_rb(device, ft_data->rb_buffer,
+ ft_data->rb_size<<2, 0, ft_data->rb_size);
+
+ KGSL_FT_INFO(device, "Bad RB buffer size 0x%X\n",
+ ft_data->bad_rb_size);
+ adreno_dump_rb(device, ft_data->bad_rb_buffer,
+ ft_data->bad_rb_size<<2, 0, ft_data->bad_rb_size);
+
+ KGSL_FT_INFO(device, "Good RB buffer size 0x%X\n",
+ ft_data->good_rb_size);
+ adreno_dump_rb(device, ft_data->good_rb_buffer,
+ ft_data->good_rb_size<<2, 0, ft_data->good_rb_size);
+
}
-
- if (ret || !rec_data->bad_rb_size) {
- adreno_ringbuffer_restore(rb, rec_data->rb_buffer,
- rec_data->rb_size);
+}
+
+static int
+_adreno_ft_resubmit_rb(struct kgsl_device *device,
+ struct adreno_ringbuffer *rb,
+ struct kgsl_context *context,
+ struct adreno_ft_data *ft_data,
+ unsigned int *buff, unsigned int size)
+{
+ unsigned int ret = 0;
+
+ _adreno_debug_ft_info(device, ft_data);
+
+ if (_adreno_ft_restart_device(device, context, ft_data))
+ return 1;
+
+ if (size) {
+
+ /* submit commands and wait for them to pass */
+ adreno_ringbuffer_restore(rb, buff, size);
+
ret = adreno_idle(device);
- if (ret) {
- ret = -EAGAIN;
- goto done;
+ }
+
+ return ret;
+}
+
+
+static int
+_adreno_ft(struct kgsl_device *device,
+ struct adreno_ft_data *ft_data)
+{
+ int ret = 0, i;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+ struct kgsl_context *context;
+ struct adreno_context *adreno_context = NULL;
+ struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
+
+ context = idr_find(&device->context_idr, ft_data->context_id);
+ if (context == NULL) {
+ KGSL_FT_CRIT(device, "Last context unknown id:%d\n",
+ ft_data->context_id);
+ } else {
+ adreno_context = context->devctxt;
+ adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
+ /*
+ * set the invalid ts flag to 0 for this context since we have
+ * detected a hang for it
+ */
+ context->wait_on_invalid_ts = false;
+
+ /*
+ * This flag will be set by userspace for contexts
+ * that do not want to be fault tolerant (ex: OPENCL)
+ */
+ if (adreno_context->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE) {
+ KGSL_FT_ERR(device,
+ "No FT set for this context play good cmds\n");
+ goto play_good_cmds;
}
- if (rec_data->last_valid_ctx_id) {
- struct kgsl_context *last_ctx =
- idr_find(&device->context_idr,
- rec_data->last_valid_ctx_id);
- if (last_ctx)
- adreno_dev->drawctxt_active = last_ctx->devctxt;
+
+ }
+
+ /*
+ * Extract valid contents from rb which can still be executed after
+ * hang
+ */
+ adreno_ringbuffer_extract(rb, ft_data);
+
+ /* Check if we detected a long running IB,
+ * if true do not attempt replay of bad cmds */
+ if (adreno_dev->long_ib) {
+ if (_adreno_check_long_ib(device)) {
+ ft_data->status = 1;
+ _adreno_debug_ft_info(device, ft_data);
+ goto play_good_cmds;
+ } else {
+ adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
+ return 0;
}
}
+
+ /* Do not try the bad commands if hang is due to a fault */
+ if (device->mmu.fault) {
+ KGSL_FT_ERR(device, "MMU fault skipping bad cmds\n");
+ device->mmu.fault = 0;
+ goto play_good_cmds;
+ }
+
+ if (ft_data->ft_policy & KGSL_FT_DISABLE) {
+ KGSL_FT_ERR(device, "NO FT policy play only good cmds\n");
+ goto play_good_cmds;
+ }
+
+ if (ft_data->ft_policy & KGSL_FT_REPLAY) {
+
+ ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+ ft_data->bad_rb_buffer, ft_data->bad_rb_size);
+
+ if (ret) {
+ KGSL_FT_ERR(device, "Replay unsuccessful\n");
+ ft_data->status = 1;
+ } else
+ goto play_good_cmds;
+ }
+
+ if (ft_data->ft_policy & KGSL_FT_SKIPIB) {
+
+ for (i = 0; i < ft_data->bad_rb_size; i++) {
+ if ((ft_data->bad_rb_buffer[i] ==
+ CP_HDR_INDIRECT_BUFFER_PFD) &&
+ (ft_data->bad_rb_buffer[i+1] == ft_data->ib1)) {
+
+ ft_data->bad_rb_buffer[i] = cp_nop_packet(2);
+ ft_data->bad_rb_buffer[i+1] =
+ KGSL_NOP_IB_IDENTIFIER;
+ ft_data->bad_rb_buffer[i+2] =
+ KGSL_NOP_IB_IDENTIFIER;
+ break;
+ }
+ }
+
+ if ((i == (ft_data->bad_rb_size)) || (!ft_data->ib1)) {
+ KGSL_FT_ERR(device, "Bad IB to NOP not found\n");
+ ft_data->status = 1;
+ goto play_good_cmds;
+ }
+
+ ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+ ft_data->bad_rb_buffer, ft_data->bad_rb_size);
+
+ if (ret) {
+ KGSL_FT_ERR(device, "NOP faulty IB unsuccessful\n");
+ ft_data->status = 1;
+ } else {
+ ft_data->status = 0;
+ goto play_good_cmds;
+ }
+ }
+
+ if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) {
+
+ for (i = 0; i < ft_data->bad_rb_size; i++) {
+ if (ft_data->bad_rb_buffer[i] ==
+ KGSL_END_OF_FRAME_IDENTIFIER) {
+ ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
+ break;
+ }
+ }
+
+ /* EOF not found in RB, discard till EOF in
+ next IB submission */
+ if (i == ft_data->bad_rb_size) {
+ adreno_context->flags |= CTXT_FLAGS_SKIP_EOF;
+ KGSL_FT_INFO(device,
+ "EOF not found in RB, skip next issueib till EOF\n");
+ ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
+ }
+
+ ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+ ft_data->bad_rb_buffer, ft_data->bad_rb_size);
+
+ if (ret) {
+ KGSL_FT_ERR(device, "Skip EOF unsuccessful\n");
+ ft_data->status = 1;
+ } else {
+ ft_data->status = 0;
+ goto play_good_cmds;
+ }
+ }
+
+play_good_cmds:
+
+ if (ft_data->status)
+ KGSL_FT_ERR(device, "Bad context commands failed\n");
+ else {
+ KGSL_FT_INFO(device, "Bad context commands success\n");
+
+ if (adreno_context) {
+ adreno_context->flags = (adreno_context->flags &
+ ~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_FT;
+ }
+ adreno_dev->drawctxt_active = last_active_ctx;
+ }
+
+ ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
+ ft_data->good_rb_buffer, ft_data->good_rb_size);
+
+ if (ret) {
+ /* If we fail here we can try to invalidate another
+ * context and try fault tolerance again */
+ ret = -EAGAIN;
+ KGSL_FT_ERR(device, "Playing good commands unsuccessful\n");
+ goto done;
+ } else
+ KGSL_FT_INFO(device, "Playing good commands successful\n");
+
+ /* ringbuffer now has data from the last valid context id,
+ * so restore the active_ctx to the last valid context */
+ if (ft_data->last_valid_ctx_id) {
+ struct kgsl_context *last_ctx =
+ idr_find(&device->context_idr,
+ ft_data->last_valid_ctx_id);
+ if (last_ctx)
+ adreno_dev->drawctxt_active = last_ctx->devctxt;
+ }
+
done:
-
+ /* Turn off iommu clocks */
if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
return ret;
}
static int
-adreno_recover_hang(struct kgsl_device *device,
- struct adreno_recovery_data *rec_data)
+adreno_ft(struct kgsl_device *device,
+ struct adreno_ft_data *ft_data)
{
int ret = 0;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
unsigned int timestamp;
- KGSL_DRV_ERR(device,
- "Starting recovery from 3D GPU hang. Recovery parameters: IB1: 0x%X, "
+ KGSL_FT_INFO(device,
+ "Start Parameters: IB1: 0x%X, "
"Bad context_id: %u, global_eop: 0x%x\n",
- rec_data->ib1, rec_data->context_id, rec_data->global_eop);
+ ft_data->ib1, ft_data->context_id, ft_data->global_eop);
timestamp = rb->timestamp[KGSL_MEMSTORE_GLOBAL];
- KGSL_DRV_ERR(device, "Last issued global timestamp: %x\n", timestamp);
+ KGSL_FT_INFO(device, "Last issued global timestamp: %x\n", timestamp);
+ /* We may need to replay commands multiple times based on whether
+ * multiple contexts hang the GPU */
while (true) {
- if (!ret)
- ret = _adreno_recover_hang(device, rec_data, true);
- else
- ret = _adreno_recover_hang(device, rec_data, false);
+
+ ret = _adreno_ft(device, ft_data);
if (-EAGAIN == ret) {
- adreno_destroy_recovery_data(rec_data);
- adreno_setup_recovery_data(device, rec_data);
- KGSL_DRV_ERR(device,
- "Retry recovery from 3D GPU hang. Recovery parameters: "
+ /* setup new fault tolerance parameters and retry, this
+ * means more than 1 contexts are causing hang */
+ adreno_destroy_ft_data(ft_data);
+ ret = adreno_setup_ft_data(device, ft_data);
+ if (ret)
+ goto done;
+ KGSL_FT_INFO(device,
+ "Retry. Parameters: "
"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
- rec_data->ib1, rec_data->context_id,
- rec_data->global_eop);
+ ft_data->ib1, ft_data->context_id,
+ ft_data->global_eop);
} else {
break;
}
@@ -1033,7 +1999,7 @@
if (ret)
goto done;
-
+ /* Restore correct states after fault tolerance */
if (adreno_dev->drawctxt_active)
device->mmu.hwpagetable =
adreno_dev->drawctxt_active->pagetable;
@@ -1044,115 +2010,88 @@
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
eoptimestamp),
rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
+
+ /* switch to NULL ctxt */
+ if (adreno_dev->drawctxt_active != NULL)
+ adreno_drawctxt_switch(adreno_dev, NULL, 0);
+
done:
adreno_set_max_ts_for_bad_ctxs(device);
adreno_mark_context_status(device, ret);
-
- if (!ret)
- KGSL_DRV_ERR(device, "Recovery succeeded\n");
- else
- KGSL_DRV_ERR(device, "Recovery failed\n");
- return ret;
-}
-
-static int adreno_kill_suspect(struct kgsl_device *device)
-{
- int ret = 1;
-#ifdef CONFIG_MSM_KGSL_KILL_HANG_PROCESS
- int cankill = 1;
- char suspect_task_comm[TASK_COMM_LEN+1];
- char suspect_task_parent_comm[TASK_COMM_LEN+1];
- int suspect_tgid;
- struct task_struct *suspect_task = get_current();
- struct task_struct *suspect_parent_task = suspect_task->group_leader;
- int i = 0;
-
- suspect_tgid = task_tgid_nr(suspect_task);
- get_task_comm(suspect_task_comm, suspect_task);
-
- if (suspect_parent_task)
- get_task_comm(suspect_task_parent_comm, suspect_parent_task);
- else
- suspect_task_parent_comm[0] = '\0';
-
-
-
- for (i = 0; i < ARRAY_SIZE(kgsl_blocking_process_tbl); i++) {
- if (!((strncmp(suspect_task_comm,
- kgsl_blocking_process_tbl[i].name, TASK_COMM_LEN)) &&
- (strncmp(suspect_task_parent_comm,
- kgsl_blocking_process_tbl[i].name, TASK_COMM_LEN)))) {
- cankill=0;
- break;
- }
- }
-
- if (cankill) {
- KGSL_DRV_ERR(device, "We need to kill suspect process "
- "causing gpu hung, tgid=%d, name=%s, pname=%s\n",
- suspect_tgid, suspect_task_comm, suspect_task_parent_comm);
-
- do_send_sig_info(SIGKILL,
- SEND_SIG_FORCED, suspect_task, true);
- ret = 0;
- }
-#endif
+ KGSL_FT_ERR(device, "policy 0x%X status 0x%x\n",
+ ft_data->ft_policy, ret);
return ret;
}
int
-adreno_dump_and_recover(struct kgsl_device *device)
+adreno_dump_and_exec_ft(struct kgsl_device *device)
{
int result = -ETIMEDOUT;
- struct adreno_recovery_data rec_data;
+ struct adreno_ft_data ft_data;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+ unsigned int curr_pwrlevel;
if (device->state == KGSL_STATE_HUNG)
goto done;
- if (device->state == KGSL_STATE_DUMP_AND_RECOVER) {
+ if (device->state == KGSL_STATE_DUMP_AND_FT) {
mutex_unlock(&device->mutex);
- wait_for_completion(&device->recovery_gate);
+ wait_for_completion(&device->ft_gate);
mutex_lock(&device->mutex);
if (device->state != KGSL_STATE_HUNG)
result = 0;
} else {
- kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_RECOVER);
- INIT_COMPLETION(device->recovery_gate);
-
+ kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_FT);
+ INIT_COMPLETION(device->ft_gate);
+ /* Detected a hang */
-
- result = adreno_setup_recovery_data(device, &rec_data);
- adreno_postmortem_dump(device, 0);
+ /* Run fault tolerance at max power level */
+ curr_pwrlevel = pwr->active_pwrlevel;
+ kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel);
- kgsl_device_snapshot(device, 1);
+ /* Get the fault tolerance data as soon as hang is detected */
+ result = adreno_setup_ft_data(device, &ft_data);
- result = adreno_recover_hang(device, &rec_data);
- adreno_destroy_recovery_data(&rec_data);
+ /*
+ * If long ib is detected, do not attempt postmortem or
+ * snapshot, if GPU is still executing commands
+ * we will get errors
+ */
+ if (!adreno_dev->long_ib) {
+ /*
+ * Trigger an automatic dump of the state to
+ * the console
+ */
+ kgsl_postmortem_dump(device, 0);
+
+ /*
+ * Make a GPU snapshot. For now, do it after the
+ * PM dump so we can at least be sure the PM dump
+ * will work as it always has
+ */
+ kgsl_device_snapshot(device, 1);
+ }
+
+ if (!result) {
+ result = adreno_ft(device, &ft_data);
+ adreno_destroy_ft_data(&ft_data);
+ }
+
+ /* restore power level */
+ kgsl_pwrctrl_pwrlevel_change(device, curr_pwrlevel);
+
if (result) {
kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
} else {
kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
mod_timer(&device->idle_timer, jiffies + FIRST_TIMEOUT);
}
- complete_all(&device->recovery_gate);
-
-
- if (!device->snapshot_no_panic) {
- if (result) {
- msleep(10000);
- panic("GPU Hang");
- } else {
- if (board_mfg_mode() ||
- adreno_kill_suspect(device)) {
- msleep(10000);
- panic("Recoverable GPU Hang");
- }
- }
- }
+ complete_all(&device->ft_gate);
}
done:
return result;
}
-EXPORT_SYMBOL(adreno_dump_and_recover);
+EXPORT_SYMBOL(adreno_dump_and_exec_ft);
static int adreno_getproperty(struct kgsl_device *device,
enum kgsl_property_type type,
@@ -1198,8 +2137,13 @@
}
memset(&shadowprop, 0, sizeof(shadowprop));
if (device->memstore.hostptr) {
- shadowprop.gpuaddr = device->memstore.physaddr;
+ /*NOTE: with mmu enabled, gpuaddr doesn't mean
+ * anything to mmap().
+ */
+ shadowprop.gpuaddr = device->memstore.gpuaddr;
shadowprop.size = device->memstore.size;
+ /* GSL needs this to be set, even if it
+ appears to be meaningless */
shadowprop.flags = KGSL_FLAGS_INITIALIZED |
KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
}
@@ -1253,6 +2197,7 @@
unsigned int sizebytes)
{
int status = -EINVAL;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
switch (type) {
case KGSL_PROP_PWRCTRL: {
@@ -1272,16 +2217,50 @@
if (enable) {
if (pdata->nap_allowed)
device->pwrctrl.nap_allowed = true;
-
+ adreno_dev->fast_hang_detect = 1;
kgsl_pwrscale_enable(device);
} else {
device->pwrctrl.nap_allowed = false;
+ adreno_dev->fast_hang_detect = 0;
kgsl_pwrscale_disable(device);
}
status = 0;
}
break;
+ case KGSL_PROP_FAULT_TOLERANCE: {
+ struct kgsl_ft_config ftd;
+
+ if (adreno_dev->ft_user_control == 0)
+ break;
+
+ if (sizebytes != sizeof(ftd))
+ break;
+
+ if (copy_from_user(&ftd, (void __user *) value,
+ sizeof(ftd))) {
+ status = -EFAULT;
+ break;
+ }
+
+ if (ftd.ft_policy)
+ adreno_dev->ft_policy = ftd.ft_policy;
+ else
+ adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
+
+ if (ftd.ft_pf_policy)
+ adreno_dev->ft_pf_policy = ftd.ft_policy;
+ else
+ adreno_dev->ft_pf_policy =
+ KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
+
+ if (ftd.ft_pm_dump)
+ device->pm_dump_enable = 1;
+ else
+ device->pm_dump_enable = 0;
+
+ }
+ break;
default:
break;
}
@@ -1289,12 +2268,6 @@
return status;
}
-static inline void adreno_poke(struct kgsl_device *device)
-{
- struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- adreno_regwrite(device, REG_CP_RB_WPTR, adreno_dev->ringbuffer.wptr);
-}
-
static int adreno_ringbuffer_drain(struct kgsl_device *device,
unsigned int *regs)
{
@@ -1306,17 +2279,19 @@
if (!(rb->flags & KGSL_FLAGS_STARTED))
return 0;
+ /*
+ * The first time into the loop, wait for 100 msecs and kick wptr again
+ * to ensure that the hardware has updated correctly. After that, kick
+ * it periodically every KGSL_TIMEOUT_PART msecs until the timeout
+ * expires
+ */
wait = jiffies + msecs_to_jiffies(100);
- adreno_poke(device);
-
do {
if (time_after(jiffies, wait)) {
- adreno_poke(device);
-
-
- if (adreno_hang_detect(device, regs))
+ /* Check to see if the core is hung */
+ if (adreno_ft_detect(device, regs))
return -ETIMEDOUT;
wait = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
@@ -1333,13 +2308,14 @@
return 0;
}
+/* Caller must hold the device mutex. */
int adreno_idle(struct kgsl_device *device)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
unsigned int rbbm_status;
unsigned long wait_time;
unsigned long wait_time_part;
- unsigned int prev_reg_val[hang_detect_regs_count];
+ unsigned int prev_reg_val[ft_detect_regs_count];
memset(prev_reg_val, 0, sizeof(prev_reg_val));
@@ -1348,12 +2324,12 @@
0x00000000, 0x80000000);
retry:
-
+ /* First, wait for the ringbuffer to drain */
if (adreno_ringbuffer_drain(device, prev_reg_val))
goto err;
-
- wait_time = jiffies + ADRENO_IDLE_TIMEOUT;
+ /* now, wait for the GPU to finish its operations */
+ wait_time = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
while (time_before(jiffies, wait_time)) {
@@ -1367,10 +2343,12 @@
return 0;
}
+ /* Dont wait for timeout, detect hang faster.
+ */
if (time_after(jiffies, wait_time_part)) {
wait_time_part = jiffies +
msecs_to_jiffies(KGSL_TIMEOUT_PART);
- if ((adreno_hang_detect(device, prev_reg_val)))
+ if ((adreno_ft_detect(device, prev_reg_val)))
goto err;
}
@@ -1378,24 +2356,32 @@
err:
KGSL_DRV_ERR(device, "spun too long waiting for RB to idle\n");
- if (KGSL_STATE_DUMP_AND_RECOVER != device->state &&
- !adreno_dump_and_recover(device)) {
+ if (KGSL_STATE_DUMP_AND_FT != device->state &&
+ !adreno_dump_and_exec_ft(device)) {
wait_time = jiffies + ADRENO_IDLE_TIMEOUT;
goto retry;
}
return -ETIMEDOUT;
}
+/**
+ * is_adreno_rbbm_status_idle - Check if GPU core is idle by probing
+ * rbbm_status register
+ * @device - Pointer to the GPU device whose idle status is to be
+ * checked
+ * @returns - Returns whether the core is idle (based on rbbm_status)
+ * false if the core is active, true if the core is idle
+ */
static bool is_adreno_rbbm_status_idle(struct kgsl_device *device)
{
unsigned int reg_rbbm_status;
bool status = false;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
+ /* Is the core idle? */
adreno_regread(device,
- adreno_dev->gpudev->reg_rbbm_status,
- ®_rbbm_status);
+ adreno_dev->gpudev->reg_rbbm_status,
+ ®_rbbm_status);
if (adreno_is_a2xx(adreno_dev)) {
if (reg_rbbm_status == 0x110)
@@ -1414,13 +2400,22 @@
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
WARN_ON(device->state == KGSL_STATE_INIT);
-
+ /* If the device isn't active, don't force it on. */
if (device->state == KGSL_STATE_ACTIVE) {
-
+ /* Is the ring buffer is empty? */
GSL_RB_GET_READPTR(rb, &rb->rptr);
if (!device->active_cnt && (rb->rptr == rb->wptr)) {
-
- status = is_adreno_rbbm_status_idle(device);
+ /*
+ * Are there interrupts pending? If so then pretend we
+ * are not idle - this avoids the possiblity that we go
+ * to a lower power state without handling interrupts
+ * first.
+ */
+
+ if (!adreno_dev->gpudev->irq_pending(adreno_dev)) {
+ /* Is the core idle? */
+ status = is_adreno_rbbm_status_idle(device);
+ }
}
} else {
status = true;
@@ -1428,16 +2423,14 @@
return status;
}
+/* Caller must hold the device mutex. */
static int adreno_suspend_context(struct kgsl_device *device)
{
int status = 0;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
+ /* switch to NULL ctxt */
if (adreno_dev->drawctxt_active != NULL) {
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- device->current_process_priv = NULL;
-#endif
adreno_drawctxt_switch(adreno_dev, NULL, 0);
status = adreno_idle(device);
}
@@ -1445,6 +2438,7 @@
return status;
}
+/* Find a memory structure attached to an adreno context */
struct kgsl_memdesc *adreno_find_ctxtmem(struct kgsl_device *device,
unsigned int pt_base, unsigned int gpuaddr, unsigned int size)
@@ -1460,7 +2454,8 @@
adreno_context = (struct adreno_context *)context->devctxt;
- if (kgsl_mmu_pt_equal(adreno_context->pagetable, pt_base)) {
+ if (kgsl_mmu_pt_equal(&device->mmu, adreno_context->pagetable,
+ pt_base)) {
struct kgsl_memdesc *desc;
desc = &adreno_context->gpustate;
@@ -1499,7 +2494,7 @@
size))
return &device->mmu.setstate_memory;
- entry = kgsl_get_mem_entry(pt_base, gpuaddr, size);
+ entry = kgsl_get_mem_entry(device, pt_base, gpuaddr, size);
if (entry)
return &entry->memdesc;
@@ -1527,6 +2522,8 @@
if (!in_interrupt())
kgsl_pre_hwaccess(device);
+ /*ensure this read finishes before the next one.
+ * i.e. act like normal readl() */
*value = __raw_readl(reg);
rmb();
}
@@ -1544,14 +2541,10 @@
kgsl_cffdump_regwrite(device->id, offsetwords << 2, value);
reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
+ /*ensure previous writes post before this one,
+ * i.e. act like normal writel() */
wmb();
- adreno_regwrite_footprint = 1;
- adreno_regwrite_reg = reg;
- adreno_regwrite_val = value;
- dsb();
__raw_writel(value, reg);
- adreno_regwrite_footprint = 0;
- dsb();
}
static unsigned int _get_context_id(struct kgsl_context *k_ctxt)
@@ -1568,66 +2561,99 @@
return context_id;
}
-static int kgsl_check_interrupt_timestamp(struct kgsl_device *device,
+static unsigned int adreno_check_hw_ts(struct kgsl_device *device,
struct kgsl_context *context, unsigned int timestamp)
{
- int status;
+ int status = 0;
unsigned int ref_ts, enableflag;
- unsigned int context_id;
- struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ unsigned int context_id = _get_context_id(context);
- mutex_lock(&device->mutex);
- context_id = _get_context_id(context);
+ /*
+ * If the context ID is invalid, we are in a race with
+ * the context being destroyed by userspace so bail.
+ */
if (context_id == KGSL_CONTEXT_INVALID) {
KGSL_DRV_WARN(device, "context was detached");
- status = -EINVAL;
- goto unlock;
+ return -EINVAL;
}
status = kgsl_check_timestamp(device, context, timestamp);
- if (!status) {
- kgsl_sharedmem_readl(&device->memstore, &enableflag,
- KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
- mb();
+ if (status)
+ return status;
- if (enableflag) {
- kgsl_sharedmem_readl(&device->memstore, &ref_ts,
+ kgsl_sharedmem_readl(&device->memstore, &enableflag,
+ KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
+ /*
+ * Barrier is needed here to make sure the read from memstore
+ * has posted
+ */
+
+ mb();
+
+ if (enableflag) {
+ kgsl_sharedmem_readl(&device->memstore, &ref_ts,
KGSL_MEMSTORE_OFFSET(context_id,
ref_wait_ts));
- mb();
- if (timestamp_cmp(ref_ts, timestamp) >= 0) {
- kgsl_sharedmem_writel(&device->memstore,
+
+ /* Make sure the memstore read has posted */
+ mb();
+ if (timestamp_cmp(ref_ts, timestamp) >= 0) {
+ kgsl_sharedmem_writel(&device->memstore,
+ KGSL_MEMSTORE_OFFSET(context_id,
+ ref_wait_ts), timestamp);
+ /* Make sure the memstore write is posted */
+ wmb();
+ }
+ } else {
+ kgsl_sharedmem_writel(&device->memstore,
KGSL_MEMSTORE_OFFSET(context_id,
ref_wait_ts), timestamp);
- wmb();
- }
- } else {
- unsigned int cmds[2];
- kgsl_sharedmem_writel(&device->memstore,
- KGSL_MEMSTORE_OFFSET(context_id,
- ref_wait_ts), timestamp);
- enableflag = 1;
- kgsl_sharedmem_writel(&device->memstore,
+ enableflag = 1;
+ kgsl_sharedmem_writel(&device->memstore,
KGSL_MEMSTORE_OFFSET(context_id,
ts_cmp_enable), enableflag);
- wmb();
- cmds[0] = cp_type3_packet(CP_NOP, 1);
- cmds[1] = 0;
+ /* Make sure the memstore write gets posted */
+ wmb();
- if (adreno_dev->drawctxt_active)
- adreno_ringbuffer_issuecmds(device,
- adreno_dev->drawctxt_active,
- KGSL_CMD_FLAGS_NONE, &cmds[0], 2);
- else
- BUG();
- }
+ /*
+ * submit a dummy packet so that even if all
+ * commands upto timestamp get executed we will still
+ * get an interrupt
+ */
+
+ if (context && device->state != KGSL_STATE_SLUMBER)
+ adreno_ringbuffer_issuecmds(device, context->devctxt,
+ KGSL_CMD_FLAGS_NONE, NULL, 0);
}
-unlock:
+
+ return 0;
+}
+
+/* Return 1 if the event timestmp has already passed, 0 if it was marked */
+static int adreno_next_event(struct kgsl_device *device,
+ struct kgsl_event *event)
+{
+ return adreno_check_hw_ts(device, event->context, event->timestamp);
+}
+
+static int adreno_check_interrupt_timestamp(struct kgsl_device *device,
+ struct kgsl_context *context, unsigned int timestamp)
+{
+ int status;
+
+ mutex_lock(&device->mutex);
+ status = adreno_check_hw_ts(device, context, timestamp);
mutex_unlock(&device->mutex);
return status;
}
+/*
+ wait_event_interruptible_timeout checks for the exit condition before
+ placing a process in wait q. For conditional interrupts we expect the
+ process to already be in its wait q when its exit condition checking
+ function is called.
+*/
#define kgsl_wait_event_interruptible_timeout(wq, condition, timeout, io)\
({ \
long __ret = timeout; \
@@ -1640,134 +2666,405 @@
-unsigned int adreno_hang_detect(struct kgsl_device *device,
+unsigned int adreno_ft_detect(struct kgsl_device *device,
unsigned int *prev_reg_val)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- unsigned int curr_reg_val[hang_detect_regs_count];
- unsigned int hang_detected = 1;
+ unsigned int curr_reg_val[ft_detect_regs_count];
+ unsigned int fast_hang_detected = 1;
+ unsigned int long_ib_detected = 1;
unsigned int i;
+ static unsigned long next_hang_detect_time;
+ static unsigned int prev_global_ts;
+ unsigned int curr_global_ts = 0;
+ unsigned int curr_context_id = 0;
+ static struct adreno_context *curr_context;
+ static struct kgsl_context *context;
if (!adreno_dev->fast_hang_detect)
- return 0;
+ fast_hang_detected = 0;
- if (is_adreno_rbbm_status_idle(device))
- return 0;
+ if (!adreno_dev->long_ib_detect)
+ long_ib_detected = 0;
- for (i = 0; i < hang_detect_regs_count; i++) {
- adreno_regread(device, hang_detect_regs[i],
- &curr_reg_val[i]);
- if (curr_reg_val[i] != prev_reg_val[i]) {
- prev_reg_val[i] = curr_reg_val[i];
- hang_detected = 0;
+ if (is_adreno_rbbm_status_idle(device)) {
+
+ /*
+ * On A20X if the RPTR != WPTR and the device is idle, then
+ * the last write to WPTR probably failed to latch so write it
+ * again
+ */
+
+ if (adreno_is_a2xx(adreno_dev)) {
+ unsigned int rptr;
+ adreno_regread(device, REG_CP_RB_RPTR, &rptr);
+ if (rptr != adreno_dev->ringbuffer.wptr)
+ adreno_regwrite(device, REG_CP_RB_WPTR,
+ adreno_dev->ringbuffer.wptr);
}
+
+ return 0;
}
- return hang_detected;
+ /*
+ * Time interval between hang detection should be KGSL_TIMEOUT_PART
+ * or more, if next hang detection is requested < KGSL_TIMEOUT_PART
+ * from the last time do nothing.
+ */
+ if ((next_hang_detect_time) &&
+ (time_before(jiffies, next_hang_detect_time)))
+ return 0;
+ else
+ next_hang_detect_time = (jiffies +
+ msecs_to_jiffies(KGSL_TIMEOUT_PART-1));
+
+ /* Read the current Hang detect reg values here */
+ for (i = 0; i < ft_detect_regs_count; i++) {
+ if (ft_detect_regs[i] == 0)
+ continue;
+ adreno_regread(device, ft_detect_regs[i],
+ &curr_reg_val[i]);
+ }
+
+ /* Read the current global timestamp here */
+ kgsl_sharedmem_readl(&device->memstore,
+ &curr_global_ts,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp));
+
+ mb();
+
+ if (curr_global_ts == prev_global_ts) {
+
+ /* Get the current context here */
+ if (context == NULL) {
+ kgsl_sharedmem_readl(&device->memstore,
+ &curr_context_id,
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ current_context));
+ context = idr_find(&device->context_idr,
+ curr_context_id);
+ if (context != NULL) {
+ curr_context = context->devctxt;
+ curr_context->ib_gpu_time_used = 0;
+ } else {
+ KGSL_DRV_ERR(device,
+ "Fault tolerance no context found\n");
+ }
+ }
+
+ mb();
+
+ if (curr_context != NULL) {
+
+ curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART;
+ KGSL_FT_INFO(device,
+ "Proc %s used GPU Time %d ms on timestamp 0x%X\n",
+ curr_context->pid_name, curr_context->ib_gpu_time_used,
+ curr_global_ts+1);
+
+ for (i = 0; i < ft_detect_regs_count; i++) {
+ if (curr_reg_val[i] != prev_reg_val[i]) {
+ fast_hang_detected = 0;
+
+ /* Check for long IB here */
+ if ((i >=
+ LONG_IB_DETECT_REG_INDEX_START)
+ &&
+ (i <=
+ LONG_IB_DETECT_REG_INDEX_END))
+ long_ib_detected = 0;
+ }
+ }
+
+ if (fast_hang_detected) {
+ KGSL_FT_ERR(device,
+ "Proc %s, ctxt_id %d ts %d triggered fault tolerance"
+ " on global ts %d\n",
+ curr_context->pid_name, curr_context->id
+ , (kgsl_readtimestamp(device, context,
+ KGSL_TIMESTAMP_RETIRED)+1),
+ curr_global_ts+1);
+ return 1;
+ }
+
+ if ((long_ib_detected) &&
+ (!(curr_context->flags &
+ CTXT_FLAGS_NO_FAULT_TOLERANCE))) {
+ curr_context->ib_gpu_time_used +=
+ KGSL_TIMEOUT_PART;
+ if (curr_context->ib_gpu_time_used >
+ KGSL_TIMEOUT_LONG_IB_DETECTION) {
+ if (adreno_dev->long_ib_ts !=
+ curr_global_ts) {
+ KGSL_FT_ERR(device,
+ "Proc %s, ctxt_id %d ts %d"
+ "used GPU for %d ms long ib "
+ "detected on global ts %d\n",
+ curr_context->pid_name,
+ curr_context->id,
+ (kgsl_readtimestamp(device,
+ context,
+ KGSL_TIMESTAMP_RETIRED)+1),
+ curr_context->ib_gpu_time_used,
+ curr_global_ts+1);
+ adreno_dev->long_ib = 1;
+ adreno_dev->long_ib_ts =
+ curr_global_ts;
+ curr_context->ib_gpu_time_used =
+ 0;
+ return 1;
+ }
+ }
+ }
+ } else {
+ KGSL_FT_ERR(device,
+ "Last context unknown id:%d\n",
+ curr_context_id);
+ }
+ } else {
+ /* GPU is moving forward */
+ prev_global_ts = curr_global_ts;
+ context = NULL;
+ curr_context = NULL;
+ adreno_dev->long_ib = 0;
+ adreno_dev->long_ib_ts = 0;
+ }
+
+
+ /* If hangs are not detected copy the current reg values
+ * to previous values and return no hang */
+ for (i = 0; i < ft_detect_regs_count; i++)
+ prev_reg_val[i] = curr_reg_val[i];
+ return 0;
}
+/**
+ * adreno_handle_hang - Process a hang detected in adreno_waittimestamp
+ * @device - pointer to a KGSL device structure
+ * @context - pointer to the active KGSL context
+ * @timestamp - the timestamp that the process was waiting for
+ *
+ * Process a possible GPU hang and try fault tolerance from it
+ * cleanly
+ */
+static int adreno_handle_hang(struct kgsl_device *device,
+ struct kgsl_context *context, unsigned int timestamp)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ unsigned int context_id = _get_context_id(context);
+ unsigned int ts_issued;
+ unsigned int rptr;
+ /* Do one last check to see if we somehow made it through */
+ if (kgsl_check_timestamp(device, context, timestamp))
+ return 0;
+
+ ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
+
+ adreno_regread(device, REG_CP_RB_RPTR, &rptr);
+ mb();
+
+ KGSL_DRV_WARN(device,
+ "Device hang detected while waiting for timestamp: "
+ "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, "
+ "retired timestamp: <%d:0x%x>, wptr: 0x%x, rptr: 0x%x\n",
+ context_id, timestamp, context_id, ts_issued, context_id,
+ kgsl_readtimestamp(device, context,
+ KGSL_TIMESTAMP_RETIRED),
+ adreno_dev->ringbuffer.wptr, rptr);
+
+ /* Return 0 after a successful fault tolerance */
+ if (!adreno_dump_and_exec_ft(device))
+ return 0;
+
+ return -ETIMEDOUT;
+}
+
+static int _check_pending_timestamp(struct kgsl_device *device,
+ struct kgsl_context *context, unsigned int timestamp)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ unsigned int context_id = _get_context_id(context);
+ unsigned int ts_issued;
+
+ if (context_id == KGSL_CONTEXT_INVALID)
+ return -EINVAL;
+
+ ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
+
+ if (timestamp_cmp(timestamp, ts_issued) <= 0)
+ return 0;
+
+ if (context && !context->wait_on_invalid_ts) {
+ KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, last issued ts <%d:0x%x>\n",
+ context_id, timestamp, context_id, ts_issued);
+
+ /* Only print this message once */
+ context->wait_on_invalid_ts = true;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * adreno_waittimestamp - sleep while waiting for the specified timestamp
+ * @device - pointer to a KGSL device structure
+ * @context - pointer to the active kgsl context
+ * @timestamp - GPU timestamp to wait for
+ * @msecs - amount of time to wait (in milliseconds)
+ *
+ * Wait 'msecs' milliseconds for the specified timestamp to expire. Wake up
+ * every KGSL_TIMEOUT_PART milliseconds to check for a device hang and process
+ * one if it happened. Otherwise, spend most of our time in an interruptible
+ * wait for the timestamp interrupt to be processed. This function must be
+ * called with the mutex already held.
+ */
static int adreno_waittimestamp(struct kgsl_device *device,
struct kgsl_context *context,
unsigned int timestamp,
unsigned int msecs)
{
- long status = 0;
- uint io = 1;
- static uint io_cnt;
- struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ static unsigned int io_cnt;
+ struct adreno_context *adreno_ctx = context ? context->devctxt : NULL;
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
- int retries = 0;
- unsigned int ts_issued;
unsigned int context_id = _get_context_id(context);
+ unsigned int prev_reg_val[ft_detect_regs_count];
unsigned int time_elapsed = 0;
- unsigned int prev_reg_val[hang_detect_regs_count];
unsigned int wait;
+ int ts_compare = 1;
+ int io, ret = -ETIMEDOUT;
- memset(prev_reg_val, 0, sizeof(prev_reg_val));
+ /* Get out early if the context has already been destroyed */
- ts_issued = adreno_dev->ringbuffer.timestamp[context_id];
+ if (context_id == KGSL_CONTEXT_INVALID) {
+ KGSL_DRV_WARN(device, "context was detached");
+ return -EINVAL;
+ }
-
- if (msecs == KGSL_TIMEOUT_DEFAULT)
- msecs = adreno_dev->wait_timeout;
+ /*
+ * Check to see if the requested timestamp is "newer" then the last
+ * timestamp issued. If it is complain once and return error. Only
+ * print the message once per context so that badly behaving
+ * applications don't spam the logs
+ */
- if (timestamp_cmp(timestamp, ts_issued) > 0) {
- KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, "
- "last issued ts <%d:0x%x>\n",
- context_id, timestamp, context_id, ts_issued);
- status = -EINVAL;
- goto done;
+ if (adreno_ctx && !(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) {
+ if (_check_pending_timestamp(device, context, timestamp))
+ return -EINVAL;
+
+ /* Reset the invalid timestamp flag on a valid wait */
+ context->wait_on_invalid_ts = false;
}
- if (msecs == 0 || msecs >= 100)
- wait = 100;
- else
- wait = 20;
+ /* Clear the registers used for hang detection */
+ memset(prev_reg_val, 0, sizeof(prev_reg_val));
+
+ /*
+ * On the first time through the loop only wait 100ms.
+ * this gives enough time for the engine to start moving and oddly
+ * provides better hang detection results than just going the full
+ * KGSL_TIMEOUT_PART right off the bat. The exception to this rule
+ * is if msecs happens to be < 100ms then just use the full timeout
+ */
+
+ wait = 100;
do {
- if (context_id == KGSL_CONTEXT_INVALID) {
- KGSL_DRV_WARN(device, "context was detached");
- status = -EINVAL;
- goto done;
- }
+ long status;
+
+ /*
+ * if the timestamp happens while we're not
+ * waiting, there's a chance that an interrupt
+ * will not be generated and thus the timestamp
+ * work needs to be queued.
+ */
+
if (kgsl_check_timestamp(device, context, timestamp)) {
queue_work(device->work_queue, &device->ts_expired_ws);
- status = 0;
- goto done;
+ ret = 0;
+ break;
}
- adreno_poke(device);
- io_cnt = (io_cnt + 1) % 100;
- if (io_cnt <
- pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
- io = 0;
- if ((retries > 0) &&
- (adreno_hang_detect(device, prev_reg_val)))
- goto hang_dump;
+ /* Check to see if the GPU is hung */
+ if (adreno_ft_detect(device, prev_reg_val)) {
+ ret = adreno_handle_hang(device, context, timestamp);
+ break;
+ }
+
+ /*
+ * For proper power accounting sometimes we need to call
+ * io_wait_interruptible_timeout and sometimes we need to call
+ * plain old wait_interruptible_timeout. We call the regular
+ * timeout N times out of 100, where N is a number specified by
+ * the current power level
+ */
+
+ io_cnt = (io_cnt + 1) % 100;
+ io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
+ ? 0 : 1;
mutex_unlock(&device->mutex);
+
+ /* Wait for a timestamp event */
status = kgsl_wait_event_interruptible_timeout(
- device->wait_queue,
- kgsl_check_interrupt_timestamp(device,
- context, timestamp),
- msecs_to_jiffies(wait), io);
+ device->wait_queue,
+ adreno_check_interrupt_timestamp(device, context,
+ timestamp), msecs_to_jiffies(wait), io);
mutex_lock(&device->mutex);
- if (status > 0) {
-
- status = 0;
- goto done;
- } else if (status < 0) {
-
- goto done;
+ /*
+ * If status is non zero then either the condition was satisfied
+ * or there was an error. In either event, this is the end of
+ * the line for us
+ */
+
+ if (status != 0) {
+ ret = (status > 0) ? 0 : (int) status;
+ break;
}
-
-
time_elapsed += wait;
- wait = KGSL_TIMEOUT_PART;
- retries++;
+ /* If user specified timestamps are being used, wait at least
+ * KGSL_SYNCOBJ_SERVER_TIMEOUT msecs for the user driver to
+ * issue a IB for a timestamp before checking to see if the
+ * current timestamp we are waiting for is valid or not
+ */
+
+ if (ts_compare && (adreno_ctx &&
+ (adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS))) {
+ if (time_elapsed > KGSL_SYNCOBJ_SERVER_TIMEOUT) {
+ ret = _check_pending_timestamp(device, context,
+ timestamp);
+ if (ret)
+ break;
+
+ /* Don't do this check again */
+ ts_compare = 0;
+
+ /*
+ * Reset the invalid timestamp flag on a valid
+ * wait
+ */
+ context->wait_on_invalid_ts = false;
+ }
+ }
+
+ /*
+ * We want to wait the floor of KGSL_TIMEOUT_PART
+ * and (msecs - time_elapsed).
+ */
+
+ if (KGSL_TIMEOUT_PART < (msecs - time_elapsed))
+ wait = KGSL_TIMEOUT_PART;
+ else
+ wait = (msecs - time_elapsed);
} while (!msecs || time_elapsed < msecs);
-hang_dump:
- if (kgsl_check_timestamp(device, context, timestamp))
- goto done;
- status = -ETIMEDOUT;
- KGSL_DRV_ERR(device,
- "Device hang detected while waiting for timestamp: "
- "<%d:0x%x>, last submitted timestamp: <%d:0x%x>, "
- "wptr: 0x%x\n",
- context_id, timestamp, context_id, ts_issued,
- adreno_dev->ringbuffer.wptr);
- if (!adreno_dump_and_recover(device)) {
- status = 0;
- }
-done:
- return (int)status;
+ return ret;
}
static unsigned int adreno_readtimestamp(struct kgsl_device *device,
@@ -1776,6 +3073,10 @@
unsigned int timestamp = 0;
unsigned int context_id = _get_context_id(context);
+ /*
+ * If the context ID is invalid, we are in a race with
+ * the context being destroyed by userspace so bail.
+ */
if (context_id == KGSL_CONTEXT_INVALID) {
KGSL_DRV_WARN(device, "context was detached");
return timestamp;
@@ -1849,11 +3150,13 @@
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
unsigned int cycles;
-
-
+ /* Get the busy cycles counted since the counter was last reset */
+ /* Calling this function also resets and restarts the counter */
cycles = adreno_dev->gpudev->busy_cycles(adreno_dev);
+ /* In order to calculate idle you have to have run the algorithm *
+ * at least once to get a start time. */
if (pwr->time != 0) {
s64 tmp = ktime_to_us(ktime_get());
stats->total_time = tmp - pwr->time;
@@ -1861,27 +3164,6 @@
stats->busy_time = adreno_ticks_to_us(cycles, device->pwrctrl.
pwrlevels[device->pwrctrl.active_pwrlevel].
gpu_freq);
-
-
- stats->busy_time = (stats->busy_time > stats->total_time) ? stats->total_time : stats->busy_time;
- device->gputime.total = device->gputime.total + stats->total_time;
- device->gputime.busy = device->gputime.busy + stats->busy_time;
- device->gputime_in_state[device->pwrctrl.active_pwrlevel].total
- = device->gputime_in_state[device->pwrctrl.active_pwrlevel].total + stats->total_time;
- device->gputime_in_state[device->pwrctrl.active_pwrlevel].busy
- = device->gputime_in_state[device->pwrctrl.active_pwrlevel].busy + stats->busy_time;
-
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- if(device->current_process_priv != NULL) {
- device->current_process_priv->gputime.total = device->current_process_priv->gputime.total + stats->total_time;
- device->current_process_priv->gputime.busy = device->current_process_priv->gputime.busy + stats->busy_time;
- device->current_process_priv->gputime_in_state[device->pwrctrl.active_pwrlevel].total
- = device->current_process_priv->gputime_in_state[device->pwrctrl.active_pwrlevel].total + stats->total_time;
- device->current_process_priv->gputime_in_state[device->pwrctrl.active_pwrlevel].busy
- = device->current_process_priv->gputime_in_state[device->pwrctrl.active_pwrlevel].busy + stats->busy_time;
- } else
- printk("curent_process_pirv = NULL, skip gpu usage recorde.\n");
-#endif
} else {
stats->total_time = 0;
stats->busy_time = 0;
@@ -1900,16 +3182,22 @@
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ /* Some applications need to know the chip ID too, so pass
+ * that as a parameter */
if (chipid != NULL)
*chipid = adreno_dev->chip_id;
+ /* Standard KGSL gpuid format:
+ * top word is 0x0002 for 2D or 0x0003 for 3D
+ * Bottom word is core specific identifer
+ */
return (0x0003 << 16) | ((int) adreno_dev->gpurev);
}
static const struct kgsl_functable adreno_functable = {
-
+ /* Mandatory functions */
.regread = adreno_regread,
.regwrite = adreno_regwrite,
.idle = adreno_idle,
@@ -1929,19 +3217,15 @@
.gpuid = adreno_gpuid,
.snapshot = adreno_snapshot,
.irq_handler = adreno_irq_handler,
-
+ /* Optional functions */
.setstate = adreno_setstate,
.drawctxt_create = adreno_drawctxt_create,
.drawctxt_destroy = adreno_drawctxt_destroy,
.setproperty = adreno_setproperty,
+ .postmortem_dump = adreno_dump,
+ .next_event = adreno_next_event,
};
-static struct platform_device_id adreno_id_table[] = {
- { DEVICE_3D0_NAME, (kernel_ulong_t)&device_3d0.dev, },
- { },
-};
-MODULE_DEVICE_TABLE(platform, adreno_id_table);
-
static struct platform_driver adreno_platform_driver = {
.probe = adreno_probe,
.remove = __devexit_p(adreno_remove),
@@ -1952,6 +3236,7 @@
.owner = THIS_MODULE,
.name = DEVICE_3D_NAME,
.pm = &kgsl_pm_ops,
+ .of_match_table = adreno_match_table,
}
};
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 3cfacd8..d319c98 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -17,6 +17,7 @@
#include "adreno_drawctxt.h"
#include "adreno_ringbuffer.h"
#include "kgsl_iommu.h"
+#include <mach/ocmem.h>
#define DEVICE_3D_NAME "kgsl-3d"
#define DEVICE_3D0_NAME "kgsl-3d0"
@@ -24,14 +25,24 @@
#define ADRENO_DEVICE(device) \
KGSL_CONTAINER_OF(device, struct adreno_device, dev)
+#define ADRENO_CHIPID_CORE(_id) (((_id) >> 24) & 0xFF)
+#define ADRENO_CHIPID_MAJOR(_id) (((_id) >> 16) & 0xFF)
+#define ADRENO_CHIPID_MINOR(_id) (((_id) >> 8) & 0xFF)
+#define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF)
+
+/* Flags to control command packet settings */
#define KGSL_CMD_FLAGS_NONE 0x00000000
#define KGSL_CMD_FLAGS_PMODE 0x00000001
-#define KGSL_CMD_FLAGS_NO_TS_CMP 0x00000002
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE 0x00000002
+#define KGSL_CMD_FLAGS_EOF 0x00000100
+/* Command identifiers */
#define KGSL_CONTEXT_TO_MEM_IDENTIFIER 0x2EADBEEF
#define KGSL_CMD_IDENTIFIER 0x2EEDFACE
#define KGSL_START_OF_IB_IDENTIFIER 0x2EADEABE
#define KGSL_END_OF_IB_IDENTIFIER 0x2ABEDEAD
+#define KGSL_END_OF_FRAME_IDENTIFIER 0x2E0F2E0F
+#define KGSL_NOP_IB_IDENTIFIER 0x20F20F20
#ifdef CONFIG_MSM_SCM
#define ADRENO_DEFAULT_PWRSCALE_POLICY (&kgsl_pwrscale_policy_tz)
@@ -41,10 +52,15 @@
#define ADRENO_DEFAULT_PWRSCALE_POLICY NULL
#endif
-#define ADRENO_ISTORE_START 0x5000
+void adreno_debugfs_init(struct kgsl_device *device);
+
+#define ADRENO_ISTORE_START 0x5000 /* Istore offset */
#define ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW 50
+/* One cannot wait forever for the core to idle, so set an upper limit to the
+ * amount of time to wait for the core to go idle
+ */
#define ADRENO_IDLE_TIMEOUT (20 * 1000)
@@ -57,12 +73,13 @@
ADRENO_REV_A225 = 225,
ADRENO_REV_A305 = 305,
ADRENO_REV_A320 = 320,
+ ADRENO_REV_A330 = 330,
};
struct adreno_gpudev;
struct adreno_device {
- struct kgsl_device dev;
+ struct kgsl_device dev; /* Must be first field in this struct */
unsigned int chip_id;
enum adreno_gpurev gpurev;
unsigned long gmem_base;
@@ -85,17 +102,29 @@
unsigned int instruction_size;
unsigned int ib_check_level;
unsigned int fast_hang_detect;
+ unsigned int ft_policy;
+ unsigned int ft_user_control;
+ unsigned int long_ib_detect;
+ unsigned int long_ib;
+ unsigned int long_ib_ts;
+ unsigned int ft_pf_policy;
unsigned int gpulist_index;
+ struct ocmem_buf *ocmem_hdl;
+ unsigned int ocmem_base;
};
struct adreno_gpudev {
+ /*
+ * These registers are in a different location on A3XX, so define
+ * them in the structure and use them as variables.
+ */
unsigned int reg_rbbm_status;
unsigned int reg_cp_pfp_ucode_data;
unsigned int reg_cp_pfp_ucode_addr;
-
+ /* keeps track of when we need to execute the draw workaround code */
int ctx_switches_since_last_draw;
-
+ /* GPU specific function hooks */
int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
void (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
@@ -103,13 +132,36 @@
struct adreno_context *);
irqreturn_t (*irq_handler)(struct adreno_device *);
void (*irq_control)(struct adreno_device *, int);
+ unsigned int (*irq_pending)(struct adreno_device *);
void * (*snapshot)(struct adreno_device *, void *, int *, int);
void (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
void (*start)(struct adreno_device *);
unsigned int (*busy_cycles)(struct adreno_device *);
};
-struct adreno_recovery_data {
+/*
+ * struct adreno_ft_data - Structure that contains all information to
+ * perform gpu fault tolerance
+ * @ib1 - IB1 that the GPU was executing when hang happened
+ * @context_id - Context which caused the hang
+ * @global_eop - eoptimestamp at time of hang
+ * @rb_buffer - Buffer that holds the commands from good contexts
+ * @rb_size - Number of valid dwords in rb_buffer
+ * @bad_rb_buffer - Buffer that holds commands from the hanging context
+ * bad_rb_size - Number of valid dwords in bad_rb_buffer
+ * @good_rb_buffer - Buffer that holds commands from good contexts
+ * good_rb_size - Number of valid dwords in good_rb_buffer
+ * @last_valid_ctx_id - The last context from which commands were placed in
+ * ringbuffer before the GPU hung
+ * @step - Current fault tolerance step being executed
+ * @err_code - Fault tolerance error code
+ * @fault - Indicates whether the hang was caused due to a pagefault
+ * @start_of_replay_cmds - Offset in ringbuffer from where commands can be
+ * replayed during fault tolerance
+ * @replay_for_snapshot - Offset in ringbuffer where IB's can be saved for
+ * replaying with snapshot
+ */
+struct adreno_ft_data {
unsigned int ib1;
unsigned int context_id;
unsigned int global_eop;
@@ -117,12 +169,20 @@
unsigned int rb_size;
unsigned int *bad_rb_buffer;
unsigned int bad_rb_size;
+ unsigned int *good_rb_buffer;
+ unsigned int good_rb_size;
unsigned int last_valid_ctx_id;
+ unsigned int status;
+ unsigned int ft_policy;
+ unsigned int err_code;
+ unsigned int start_of_replay_cmds;
+ unsigned int replay_for_snapshot;
};
extern struct adreno_gpudev adreno_a2xx_gpudev;
extern struct adreno_gpudev adreno_a3xx_gpudev;
+/* A2XX register sets defined in adreno_a2xx.c */
extern const unsigned int a200_registers[];
extern const unsigned int a220_registers[];
extern const unsigned int a225_registers[];
@@ -130,14 +190,18 @@
extern const unsigned int a220_registers_count;
extern const unsigned int a225_registers_count;
+/* A3XX register set defined in adreno_a3xx.c */
extern const unsigned int a3xx_registers[];
extern const unsigned int a3xx_registers_count;
extern const unsigned int a3xx_hlsq_registers[];
extern const unsigned int a3xx_hlsq_registers_count;
-extern unsigned int hang_detect_regs[];
-extern const unsigned int hang_detect_regs_count;
+extern const unsigned int a330_registers[];
+extern const unsigned int a330_registers_count;
+
+extern unsigned int ft_detect_regs[];
+extern const unsigned int ft_detect_regs_count;
int adreno_idle(struct kgsl_device *device);
@@ -146,6 +210,8 @@
void adreno_regwrite(struct kgsl_device *device, unsigned int offsetwords,
unsigned int value);
+int adreno_dump(struct kgsl_device *device, int manual);
+
struct kgsl_memdesc *adreno_find_region(struct kgsl_device *device,
unsigned int pt_base,
unsigned int gpuaddr,
@@ -160,9 +226,12 @@
void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
int hang);
-int adreno_dump_and_recover(struct kgsl_device *device);
+int adreno_dump_and_exec_ft(struct kgsl_device *device);
-unsigned int adreno_hang_detect(struct kgsl_device *device,
+void adreno_dump_rb(struct kgsl_device *device, const void *buf,
+ size_t len, int start, int size);
+
+unsigned int adreno_ft_detect(struct kgsl_device *device,
unsigned int *prev_reg_val);
static inline int adreno_is_a200(struct adreno_device *adreno_dev)
@@ -221,15 +290,33 @@
return (adreno_dev->gpurev == ADRENO_REV_A320);
}
+static inline int adreno_is_a330(struct adreno_device *adreno_dev)
+{
+ return (adreno_dev->gpurev == ADRENO_REV_A330);
+}
+
static inline int adreno_rb_ctxtswitch(unsigned int *cmd)
{
return (cmd[0] == cp_nop_packet(1) &&
cmd[1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER);
}
+/**
+ * adreno_encode_istore_size - encode istore size in CP format
+ * @adreno_dev - The 3D device.
+ *
+ * Encode the istore size into the format expected that the
+ * CP_SET_SHADER_BASES and CP_ME_INIT commands:
+ * bits 31:29 - istore size as encoded by this function
+ * bits 27:16 - vertex shader start offset in instructions
+ * bits 11:0 - pixel shader start offset in instructions.
+ */
static inline int adreno_encode_istore_size(struct adreno_device *adreno_dev)
{
unsigned int size;
+ /* in a225 the CP microcode multiplies the encoded
+ * value by 3 while decoding.
+ */
if (adreno_is_a225(adreno_dev))
size = adreno_dev->istore_size/3;
else
@@ -241,6 +328,10 @@
static inline int __adreno_add_idle_indirect_cmds(unsigned int *cmds,
unsigned int nop_gpuaddr)
{
+ /* Adding an indirect buffer ensures that the prefetch stalls until
+ * the commands in indirect buffer have completed. We need to stall
+ * prefetch with a nop indirect buffer when updating pagetables
+ * because it provides stabler synchronization */
*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
*cmds++ = nop_gpuaddr;
*cmds++ = 2;
@@ -255,7 +346,6 @@
{
unsigned int *start = cmds;
- cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
*cmds++ = cp_type0_packet(MH_MMU_MPU_END, 1);
*cmds++ = new_phys_limit;
cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
@@ -268,13 +358,20 @@
{
unsigned int *start = cmds;
- cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
*cmds++ = cp_type0_packet(REG_CP_STATE_DEBUG_INDEX, 1);
*cmds++ = (cur_ctx_bank ? 0 : 0x20);
cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
return cmds - start;
}
+/*
+ * adreno_read_cmds - Add pm4 packets to perform read
+ * @device - Pointer to device structure
+ * @cmds - Pointer to memory where read commands need to be added
+ * @addr - gpu address of the read
+ * @val - The GPU will wait until the data at address addr becomes
+ * equal to value
+ */
static inline int adreno_add_read_cmds(struct kgsl_device *device,
unsigned int *cmds, unsigned int addr,
unsigned int val, unsigned int nop_gpuaddr)
@@ -282,7 +379,7 @@
unsigned int *start = cmds;
*cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
-
+ /* MEM SPACE = memory, FUNCTION = equals */
*cmds++ = 0x13;
*cmds++ = addr;
*cmds++ = val;
@@ -292,6 +389,11 @@
return cmds - start;
}
+/*
+ * adreno_idle_cmds - Add pm4 packets for GPU idle
+ * @adreno_dev - Pointer to device structure
+ * @cmds - Pointer to memory where idle commands need to be added
+ */
static inline int adreno_add_idle_cmds(struct adreno_device *adreno_dev,
unsigned int *cmds)
{
@@ -309,4 +411,4 @@
return cmds - start;
}
-#endif
+#endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index d224a21..ba4e507 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -21,7 +21,13 @@
#include "adreno.h"
#include "adreno_a2xx_trace.h"
+/*
+ * These are the registers that are dumped with GPU snapshot
+ * and postmortem. The lists are dword offset pairs in the
+ * form of {start offset, end offset} inclusive.
+ */
+/* A200, A205 */
const unsigned int a200_registers[] = {
0x0000, 0x0002, 0x0004, 0x000B, 0x003B, 0x003D, 0x0040, 0x0044,
0x0046, 0x0047, 0x01C0, 0x01C1, 0x01C3, 0x01C8, 0x01D5, 0x01D9,
@@ -114,29 +120,74 @@
const unsigned int a220_registers_count = ARRAY_SIZE(a220_registers) / 2;
const unsigned int a225_registers_count = ARRAY_SIZE(a225_registers) / 2;
+/*
+ *
+ * Memory Map for Register, Constant & Instruction Shadow, and Command Buffers
+ * (34.5KB)
+ *
+ * +---------------------+------------+-------------+---+---------------------+
+ * | ALU Constant Shadow | Reg Shadow | C&V Buffers |Tex| Shader Instr Shadow |
+ * +---------------------+------------+-------------+---+---------------------+
+ * ________________________________/ \____________________
+ * / |
+ * +--------------+-----------+------+-----------+------------------------+
+ * | Restore Regs | Save Regs | Quad | Gmem Save | Gmem Restore | unused |
+ * +--------------+-----------+------+-----------+------------------------+
+ *
+ * 8K - ALU Constant Shadow (8K aligned)
+ * 4K - H/W Register Shadow (8K aligned)
+ * 4K - Command and Vertex Buffers
+ * - Indirect command buffer : Const/Reg restore
+ * - includes Loop & Bool const shadows
+ * - Indirect command buffer : Const/Reg save
+ * - Quad vertices & texture coordinates
+ * - Indirect command buffer : Gmem save
+ * - Indirect command buffer : Gmem restore
+ * - Unused (padding to 8KB boundary)
+ * <1K - Texture Constant Shadow (768 bytes) (8K aligned)
+ * 18K - Shader Instruction Shadow
+ * - 6K vertex (32 byte aligned)
+ * - 6K pixel (32 byte aligned)
+ * - 6K shared (32 byte aligned)
+ *
+ * Note: Reading constants into a shadow, one at a time using REG_TO_MEM, takes
+ * 3 DWORDS per DWORD transfered, plus 1 DWORD for the shadow, for a total of
+ * 16 bytes per constant. If the texture constants were transfered this way,
+ * the Command & Vertex Buffers section would extend past the 16K boundary.
+ * By moving the texture constant shadow area to start at 16KB boundary, we
+ * only require approximately 40 bytes more memory, but are able to use the
+ * LOAD_CONSTANT_CONTEXT shadowing feature for the textures, speeding up
+ * context switching.
+ *
+ * [Using LOAD_CONSTANT_CONTEXT shadowing feature for the Loop and/or Bool
+ * constants would require an additional 8KB each, for alignment.]
+ *
+ */
+/* Constants */
-#define ALU_CONSTANTS 2048
-#define NUM_REGISTERS 1024
+#define ALU_CONSTANTS 2048 /* DWORDS */
+#define NUM_REGISTERS 1024 /* DWORDS */
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-#define CMD_BUFFER_LEN 9216
+#define CMD_BUFFER_LEN 9216 /* DWORDS */
#else
-#define CMD_BUFFER_LEN 3072
+#define CMD_BUFFER_LEN 3072 /* DWORDS */
#endif
-#define TEX_CONSTANTS (32*6)
-#define BOOL_CONSTANTS 8
-#define LOOP_CONSTANTS 56
+#define TEX_CONSTANTS (32*6) /* DWORDS */
+#define BOOL_CONSTANTS 8 /* DWORDS */
+#define LOOP_CONSTANTS 56 /* DWORDS */
-#define LCC_SHADOW_SIZE 0x2000
+/* LOAD_CONSTANT_CONTEXT shadow size */
+#define LCC_SHADOW_SIZE 0x2000 /* 8KB */
-#define ALU_SHADOW_SIZE LCC_SHADOW_SIZE
-#define REG_SHADOW_SIZE 0x1000
+#define ALU_SHADOW_SIZE LCC_SHADOW_SIZE /* 8KB */
+#define REG_SHADOW_SIZE 0x1000 /* 4KB */
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-#define CMD_BUFFER_SIZE 0x9000
+#define CMD_BUFFER_SIZE 0x9000 /* 36KB */
#else
-#define CMD_BUFFER_SIZE 0x3000
+#define CMD_BUFFER_SIZE 0x3000 /* 12KB */
#endif
-#define TEX_SHADOW_SIZE (TEX_CONSTANTS*4)
+#define TEX_SHADOW_SIZE (TEX_CONSTANTS*4) /* 768 bytes */
#define REG_OFFSET LCC_SHADOW_SIZE
#define CMD_OFFSET (REG_OFFSET + REG_SHADOW_SIZE)
@@ -154,27 +205,40 @@
return SHADER_OFFSET + 3*_shader_shadow_size(adreno_dev);
}
+/* A scratchpad used to build commands during context create */
static struct tmp_ctx {
- unsigned int *start;
- unsigned int *cmd;
+ unsigned int *start; /* Command & Vertex buffer start */
+ unsigned int *cmd; /* Next available dword in C&V buffer */
-
- uint32_t bool_shadow;
- uint32_t loop_shadow;
+ /* address of buffers, needed when creating IB1 command buffers. */
+ uint32_t bool_shadow; /* bool constants */
+ uint32_t loop_shadow; /* loop constants */
- uint32_t shader_shared;
- uint32_t shader_vertex;
- uint32_t shader_pixel;
+ uint32_t shader_shared; /* shared shader instruction shadow */
+ uint32_t shader_vertex; /* vertex shader instruction shadow */
+ uint32_t shader_pixel; /* pixel shader instruction shadow */
+ /* Addresses in command buffer where separately handled registers
+ * are saved
+ */
uint32_t reg_values[33];
uint32_t chicken_restore;
- uint32_t gmem_base;
+ uint32_t gmem_base; /* Base gpu address of GMEM */
} tmp_ctx;
+/* context save (gmem -> sys) */
+/* pre-compiled vertex shader program
+*
+* attribute vec4 P;
+* void main(void)
+* {
+* gl_Position = P;
+* }
+*/
#define GMEM2SYS_VTX_PGM_LEN 0x12
static unsigned int gmem2sys_vtx_pgm[GMEM2SYS_VTX_PGM_LEN] = {
@@ -186,6 +250,15 @@
0x14000000, 0x00000000, 0xe2000000
};
+/* pre-compiled fragment shader program
+*
+* precision highp float;
+* uniform vec4 clear_color;
+* void main(void)
+* {
+* gl_FragColor = clear_color;
+* }
+*/
#define GMEM2SYS_FRAG_PGM_LEN 0x0c
@@ -196,6 +269,18 @@
0x14000000, 0x00000000, 0xe2000000
};
+/* context restore (sys -> gmem) */
+/* pre-compiled vertex shader program
+*
+* attribute vec4 position;
+* attribute vec4 texcoord;
+* varying vec4 texcoord0;
+* void main()
+* {
+* gl_Position = position;
+* texcoord0 = texcoord;
+* }
+*/
#define SYS2GMEM_VTX_PGM_LEN 0x18
@@ -208,6 +293,16 @@
0xe2020200, 0x14000000, 0x00000000, 0xe2000000
};
+/* pre-compiled fragment shader program
+*
+* precision mediump float;
+* uniform sampler2D tex0;
+* varying vec4 texcoord0;
+* void main()
+* {
+* gl_FragColor = texture2D(tex0, texcoord0.xy);
+* }
+*/
#define SYS2GMEM_FRAG_PGM_LEN 0x0f
@@ -218,68 +313,85 @@
0x14000000, 0x00000000, 0xe2000000
};
+/* shader texture constants (sysmem -> gmem) */
#define SYS2GMEM_TEX_CONST_LEN 6
static unsigned int sys2gmem_tex_const[SYS2GMEM_TEX_CONST_LEN] = {
- 0x00000002,
+ /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
+ * RFMode=ZeroClamp-1, Dim=1:2d
+ */
+ 0x00000002, /* Pitch = TBD */
- 0x00000800,
+ /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
+ * NearestClamp=1:OGL Mode
+ */
+ 0x00000800, /* Address[31:12] = TBD */
-
- 0,
+ /* Width, Height, EndianSwap=0:None */
+ 0, /* Width & Height = TBD */
+ /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
+ * Mip=2:BaseMap
+ */
0 << 1 | 1 << 4 | 2 << 7 | 3 << 10 | 2 << 23,
+ /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
+ * Dim3d=0
+ */
0,
- 1 << 9
+ /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
+ * Dim=1:2d, MipPacking=0
+ */
+ 1 << 9 /* Mip Address[31:12] = TBD */
};
#define NUM_COLOR_FORMATS 13
static enum SURFACEFORMAT surface_format_table[NUM_COLOR_FORMATS] = {
- FMT_4_4_4_4,
- FMT_1_5_5_5,
- FMT_5_6_5,
- FMT_8,
- FMT_8_8,
- FMT_8_8_8_8,
- FMT_8_8_8_8,
- FMT_16_FLOAT,
- FMT_16_16_FLOAT,
- FMT_16_16_16_16_FLOAT,
- FMT_32_FLOAT,
- FMT_32_32_FLOAT,
- FMT_32_32_32_32_FLOAT,
+ FMT_4_4_4_4, /* COLORX_4_4_4_4 */
+ FMT_1_5_5_5, /* COLORX_1_5_5_5 */
+ FMT_5_6_5, /* COLORX_5_6_5 */
+ FMT_8, /* COLORX_8 */
+ FMT_8_8, /* COLORX_8_8 */
+ FMT_8_8_8_8, /* COLORX_8_8_8_8 */
+ FMT_8_8_8_8, /* COLORX_S8_8_8_8 */
+ FMT_16_FLOAT, /* COLORX_16_FLOAT */
+ FMT_16_16_FLOAT, /* COLORX_16_16_FLOAT */
+ FMT_16_16_16_16_FLOAT, /* COLORX_16_16_16_16_FLOAT */
+ FMT_32_FLOAT, /* COLORX_32_FLOAT */
+ FMT_32_32_FLOAT, /* COLORX_32_32_FLOAT */
+ FMT_32_32_32_32_FLOAT, /* COLORX_32_32_32_32_FLOAT */
};
static unsigned int format2bytesperpixel[NUM_COLOR_FORMATS] = {
- 2,
- 2,
- 2,
- 1,
- 2,
- 4,
- 4,
- 2,
- 4,
- 8,
- 4,
- 8,
- 16,
+ 2, /* COLORX_4_4_4_4 */
+ 2, /* COLORX_1_5_5_5 */
+ 2, /* COLORX_5_6_5 */
+ 1, /* COLORX_8 */
+ 2, /* COLORX_8_8 8*/
+ 4, /* COLORX_8_8_8_8 */
+ 4, /* COLORX_S8_8_8_8 */
+ 2, /* COLORX_16_FLOAT */
+ 4, /* COLORX_16_16_FLOAT */
+ 8, /* COLORX_16_16_16_16_FLOAT */
+ 4, /* COLORX_32_FLOAT */
+ 8, /* COLORX_32_32_FLOAT */
+ 16, /* COLORX_32_32_32_32_FLOAT */
};
+/* shader linkage info */
#define SHADER_CONST_ADDR (11 * 6 + 3)
static unsigned int *program_shader(unsigned int *cmds, int vtxfrag,
unsigned int *shader_pgm, int dwords)
{
-
+ /* load the patched vertex shader stream */
*cmds++ = cp_type3_packet(CP_IM_LOAD_IMMEDIATE, 2 + dwords);
-
+ /* 0=vertex shader, 1=fragment shader */
*cmds++ = vtxfrag;
-
+ /* instruction start & size (in 32-bit words) */
*cmds++ = ((0 << 16) | dwords);
memcpy(cmds, shader_pgm, dwords << 2);
@@ -320,6 +432,7 @@
#endif
+/* chicken restore */
static unsigned int *build_chicken_restore_cmds(
struct adreno_context *drawctxt)
{
@@ -333,12 +446,15 @@
tmp_ctx.chicken_restore = virt2gpu(cmds, &drawctxt->gpustate);
*cmds++ = 0x00000000;
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->chicken_restore, start, cmds);
return cmds;
}
+/****************************************************************************/
+/* context save */
+/****************************************************************************/
static const unsigned int register_ranges_a20x[] = {
REG_RB_SURFACE_INFO, REG_RB_DEPTH_INFO,
@@ -402,6 +518,9 @@
};
+/* save h/w regs, alu constants, texture contants, etc. ...
+* requires: bool_shadow_gpuaddr, loop_shadow_gpuaddr
+*/
static void build_regsave_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
{
@@ -412,6 +531,8 @@
*cmd++ = 0;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+ /* Make sure the HW context has the correct register values
+ * before reading them. */
*cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
*cmd++ = 0;
@@ -420,7 +541,7 @@
unsigned int reg_array_size = 0;
const unsigned int *ptr_register_ranges;
-
+ /* Based on chip id choose the register ranges */
if (adreno_is_a220(adreno_dev)) {
ptr_register_ranges = register_ranges_a220;
reg_array_size = ARRAY_SIZE(register_ranges_a220);
@@ -433,7 +554,7 @@
}
-
+ /* Write HW registers into shadow */
for (i = 0; i < (reg_array_size/2) ; i++) {
build_reg_to_mem_range(ptr_register_ranges[i*2],
ptr_register_ranges[i*2+1],
@@ -441,38 +562,53 @@
}
}
-
+ /* Copy ALU constants */
cmd =
reg_to_mem(cmd, (drawctxt->gpustate.gpuaddr) & 0xFFFFE000,
REG_SQ_CONSTANT_0, ALU_CONSTANTS);
-
+ /* Copy Tex constants */
cmd =
reg_to_mem(cmd,
(drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000,
REG_SQ_FETCH_0, TEX_CONSTANTS);
#else
+ /* Insert a wait for idle packet before reading the registers.
+ * This is to fix a hang/reset seen during stress testing. In this
+ * hang, CP encountered a timeout reading SQ's boolean constant
+ * register. There is logic in the HW that blocks reading of this
+ * register when the SQ block is not idle, which we believe is
+ * contributing to the hang.*/
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
+ /* H/w registers are already shadowed; just need to disable shadowing
+ * to prevent corruption.
+ */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
- *cmd++ = 4 << 16;
- *cmd++ = 0x0;
+ *cmd++ = 4 << 16; /* regs, start=0 */
+ *cmd++ = 0x0; /* count = 0 */
+ /* ALU constants are already shadowed; just need to disable shadowing
+ * to prevent corruption.
+ */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
- *cmd++ = 0 << 16;
- *cmd++ = 0x0;
+ *cmd++ = 0 << 16; /* ALU, start=0 */
+ *cmd++ = 0x0; /* count = 0 */
+ /* Tex constants are already shadowed; just need to disable shadowing
+ * to prevent corruption.
+ */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
- *cmd++ = 1 << 16;
- *cmd++ = 0x0;
+ *cmd++ = 1 << 16; /* Tex, start=0 */
+ *cmd++ = 0x0; /* count = 0 */
#endif
-
+ /* Need to handle some of the registers separately */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = REG_SQ_GPR_MANAGEMENT;
*cmd++ = tmp_ctx.reg_values[0];
@@ -493,33 +629,34 @@
}
}
-
+ /* Copy Boolean constants */
cmd = reg_to_mem(cmd, tmp_ctx.bool_shadow, REG_SQ_CF_BOOLEANS,
BOOL_CONSTANTS);
-
+ /* Copy Loop constants */
cmd = reg_to_mem(cmd, tmp_ctx.loop_shadow,
REG_SQ_CF_LOOP, LOOP_CONSTANTS);
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->reg_save, start, cmd);
tmp_ctx.cmd = cmd;
}
+/*copy colour, depth, & stencil buffers from graphics memory to system memory*/
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
struct gmem_shadow_t *shadow)
{
unsigned int *cmds = shadow->gmem_save_commands;
unsigned int *start = cmds;
-
+ /* Calculate the new offset based on the adjusted base */
unsigned int bytesperpixel = format2bytesperpixel[shadow->format];
unsigned int addr = shadow->gmemshadow.gpuaddr;
unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* Store TP0_CHICKEN register */
*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmds++ = REG_TP0_CHICKEN;
@@ -529,48 +666,48 @@
*cmds++ = 0;
}
-
+ /* Set TP0_CHICKEN to zero */
*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
*cmds++ = 0x00000000;
-
+ /* Set PA_SC_AA_CONFIG to 0 */
*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
*cmds++ = 0x00000000;
-
+ /* program shader */
-
+ /* load shader vtx constants ... 5 dwords */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
*cmds++ = (0x1 << 16) | SHADER_CONST_ADDR;
*cmds++ = 0;
-
+ /* valid(?) vtx constant flag & addr */
*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
-
+ /* limit = 12 dwords */
*cmds++ = 0x00000030;
-
+ /* Invalidate L2 cache to make sure vertices are updated */
*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
*cmds++ = 0x1;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
- *cmds++ = 0x00ffffff;
- *cmds++ = 0x0;
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00ffffff; /* REG_VGT_MAX_VTX_INDX */
+ *cmds++ = 0x0; /* REG_VGT_MIN_VTX_INDX */
+ *cmds++ = 0x00000000; /* REG_VGT_INDX_OFFSET */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
- *cmds++ = 0x0000ffff;
+ *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLORCONTROL);
*cmds++ = 0x00000c20;
-
+ /* Repartition shaders */
*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
*cmds++ = adreno_dev->pix_shader_start;
-
+ /* Invalidate Vertex & Pixel instruction code address and sizes */
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
*cmds++ = 0x00003F00;
@@ -578,14 +715,14 @@
*cmds++ = adreno_encode_istore_size(adreno_dev)
| adreno_dev->pix_shader_start;
-
+ /* load the patched vertex shader stream */
cmds = program_shader(cmds, 0, gmem2sys_vtx_pgm, GMEM2SYS_VTX_PGM_LEN);
-
+ /* Load the patched fragment shader stream */
cmds =
program_shader(cmds, 1, gmem2sys_frag_pgm, GMEM2SYS_FRAG_PGM_LEN);
-
+ /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
if (adreno_is_a22x(adreno_dev))
@@ -594,26 +731,29 @@
*cmds++ = 0x10010001;
*cmds++ = 0x00000008;
-
+ /* resolve */
-
+ /* PA_CL_VTE_CNTL */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
-
+ /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
*cmds++ = 0x00000b00;
-
+ /* program surface info */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
- *cmds++ = shadow->gmem_pitch;
+ *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */
-
+ /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
+ * Base=gmem_base
+ */
+ /* gmem base assumed 4K aligned. */
BUG_ON(tmp_ctx.gmem_base & 0xFFF);
*cmds++ =
(shadow->
format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
-
+ /* disable Z */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
if (adreno_is_a22x(adreno_dev))
@@ -621,10 +761,17 @@
else
*cmds++ = 0;
+ /* set REG_PA_SU_SC_MODE_CNTL
+ * Front_ptype = draw triangles
+ * Back_ptype = draw triangles
+ * Provoking vertex = last
+ */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
*cmds++ = 0x00080240;
+ /* Use maximum scissor values -- quad vertices already have the
+ * correct bounds */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
*cmds++ = (0 << 16) | 0;
@@ -634,14 +781,17 @@
*cmds++ = (unsigned int)((1U << 31) | (0 << 16) | 0);
*cmds++ = (0x1fff << 16) | (0x1fff);
+ /* load the viewport so that z scale = clear depth and
+ * z offset = 0.0f
+ */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
- *cmds++ = 0xbf800000;
+ *cmds++ = 0xbf800000; /* -1.0f */
*cmds++ = 0x0;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLOR_MASK);
- *cmds++ = 0x0000000f;
+ *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
@@ -652,23 +802,29 @@
*cmds++ = 0x00000000;
*cmds++ = 0x00000000;
+ /* load the stencil ref value
+ * $AAM - do this later
+ */
-
+ /* load the COPY state */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
*cmds++ = CP_REG(REG_RB_COPY_CONTROL);
- *cmds++ = 0;
- *cmds++ = addr & 0xfffff000;
- *cmds++ = shadow->pitch >> 5;
+ *cmds++ = 0; /* RB_COPY_CONTROL */
+ *cmds++ = addr & 0xfffff000; /* RB_COPY_DEST_BASE */
+ *cmds++ = shadow->pitch >> 5; /* RB_COPY_DEST_PITCH */
+ /* Endian=none, Linear, Format=RGBA8888,Swap=0,!Dither,
+ * MaskWrite:R=G=B=A=1
+ */
*cmds++ = 0x0003c008 |
(shadow->format << RB_COPY_DEST_INFO__COPY_DEST_FORMAT__SHIFT);
-
+ /* Make sure we stay in offsetx field. */
BUG_ON(offset & 0xfffff000);
*cmds++ = offset;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_MODECONTROL);
- *cmds++ = 0x6;
+ *cmds++ = 0x6; /* EDRAM copy */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_CL_CLIP_CNTL);
@@ -680,25 +836,27 @@
*cmds++ = 0x0000000;
*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
- *cmds++ = 0;
-
+ *cmds++ = 0; /* viz query info. */
+ /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
*cmds++ = 0x00004088;
- *cmds++ = 3;
+ *cmds++ = 3; /* NumIndices=3 */
} else {
-
+ /* queue the draw packet */
*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
- *cmds++ = 0;
-
+ *cmds++ = 0; /* viz query info. */
+ /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
*cmds++ = 0x00030088;
}
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, shadow->gmem_save, start, cmds);
return cmds;
}
+/* context restore */
+/*copy colour, depth, & stencil buffers from system memory to graphics memory*/
static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
struct gmem_shadow_t *shadow)
@@ -707,7 +865,7 @@
unsigned int *start = cmds;
if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* Store TP0_CHICKEN register */
*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmds++ = REG_TP0_CHICKEN;
*cmds++ = tmp_ctx.chicken_restore;
@@ -716,53 +874,53 @@
*cmds++ = 0;
}
-
+ /* Set TP0_CHICKEN to zero */
*cmds++ = cp_type0_packet(REG_TP0_CHICKEN, 1);
*cmds++ = 0x00000000;
-
+ /* Set PA_SC_AA_CONFIG to 0 */
*cmds++ = cp_type0_packet(REG_PA_SC_AA_CONFIG, 1);
*cmds++ = 0x00000000;
-
+ /* shader constants */
-
+ /* vertex buffer constants */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
*cmds++ = (0x1 << 16) | (9 * 6);
-
+ /* valid(?) vtx constant flag & addr */
*cmds++ = shadow->quad_vertices.gpuaddr | 0x3;
-
+ /* limit = 12 dwords */
*cmds++ = 0x00000030;
-
+ /* valid(?) vtx constant flag & addr */
*cmds++ = shadow->quad_texcoords.gpuaddr | 0x3;
-
+ /* limit = 8 dwords */
*cmds++ = 0x00000020;
*cmds++ = 0;
*cmds++ = 0;
-
+ /* Invalidate L2 cache to make sure vertices are updated */
*cmds++ = cp_type0_packet(REG_TC_CNTL_STATUS, 1);
*cmds++ = 0x1;
cmds = program_shader(cmds, 0, sys2gmem_vtx_pgm, SYS2GMEM_VTX_PGM_LEN);
-
+ /* Repartition shaders */
*cmds++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
*cmds++ = adreno_dev->pix_shader_start;
-
+ /* Invalidate Vertex & Pixel instruction code address and sizes */
*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
- *cmds++ = 0x00000300;
+ *cmds++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
*cmds++ = adreno_encode_istore_size(adreno_dev)
| adreno_dev->pix_shader_start;
-
+ /* Load the patched fragment shader stream */
cmds =
program_shader(cmds, 1, sys2gmem_frag_pgm, SYS2GMEM_FRAG_PGM_LEN);
-
+ /* SQ_PROGRAM_CNTL / SQ_CONTEXT_MISC */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_SQ_PROGRAM_CNTL);
*cmds++ = 0x10030002;
@@ -770,44 +928,49 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SC_AA_MASK);
- *cmds++ = 0x0000ffff;
+ *cmds++ = 0x0000ffff; /* REG_PA_SC_AA_MASK */
if (!adreno_is_a22x(adreno_dev)) {
-
+ /* PA_SC_VIZ_QUERY */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SC_VIZ_QUERY);
- *cmds++ = 0x0;
+ *cmds++ = 0x0; /*REG_PA_SC_VIZ_QUERY */
}
-
+ /* RB_COLORCONTROL */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLORCONTROL);
*cmds++ = 0x00000c20;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 4);
*cmds++ = CP_REG(REG_VGT_MAX_VTX_INDX);
- *cmds++ = 0x00ffffff;
- *cmds++ = 0x0;
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00ffffff; /* mmVGT_MAX_VTX_INDX */
+ *cmds++ = 0x0; /* mmVGT_MIN_VTX_INDX */
+ *cmds++ = 0x00000000; /* mmVGT_INDX_OFFSET */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_VGT_VERTEX_REUSE_BLOCK_CNTL);
- *cmds++ = 0x00000002;
- *cmds++ = 0x00000002;
+ *cmds++ = 0x00000002; /* mmVGT_VERTEX_REUSE_BLOCK_CNTL */
+ *cmds++ = 0x00000002; /* mmVGT_OUT_DEALLOC_CNTL */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_SQ_INTERPOLATOR_CNTL);
- *cmds++ = 0xffffffff;
+ *cmds++ = 0xffffffff; /* mmSQ_INTERPOLATOR_CNTL */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SC_AA_CONFIG);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* REG_PA_SC_AA_CONFIG */
+ /* set REG_PA_SU_SC_MODE_CNTL
+ * Front_ptype = draw triangles
+ * Back_ptype = draw triangles
+ * Provoking vertex = last
+ */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_SU_SC_MODE_CNTL);
*cmds++ = 0x00080240;
-
+ /* texture constants */
*cmds++ =
cp_type3_packet(CP_SET_CONSTANT, (SYS2GMEM_TEX_CONST_LEN + 1));
*cmds++ = (0x1 << 16) | (0 * 6);
@@ -818,24 +981,29 @@
cmds[2] |= (shadow->width - 1) | (shadow->height - 1) << 13;
cmds += SYS2GMEM_TEX_CONST_LEN;
-
+ /* program surface info */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_RB_SURFACE_INFO);
- *cmds++ = shadow->gmem_pitch;
+ *cmds++ = shadow->gmem_pitch; /* pitch, MSAA = 1 */
+ /* RB_COLOR_INFO Endian=none, Linear, Format=RGBA8888, Swap=0,
+ * Base=gmem_base
+ */
*cmds++ =
(shadow->
format << RB_COLOR_INFO__COLOR_FORMAT__SHIFT) | tmp_ctx.gmem_base;
-
+ /* RB_DEPTHCONTROL */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_DEPTHCONTROL);
if (adreno_is_a22x(adreno_dev))
- *cmds++ = 8;
+ *cmds++ = 8; /* disable Z */
else
- *cmds++ = 0;
+ *cmds++ = 0; /* disable Z */
+ /* Use maximum scissor values -- quad vertices already
+ * have the correct bounds */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_PA_SC_SCREEN_SCISSOR_TL);
*cmds++ = (0 << 16) | 0;
@@ -847,10 +1015,10 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_PA_CL_VTE_CNTL);
-
+ /* disable X/Y/Z transforms, X/Y/Z are premultiplied by W */
*cmds++ = 0x00000b00;
-
+ /*load the viewport so that z scale = clear depth and z offset = 0.0f */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(REG_PA_CL_VPORT_ZSCALE);
*cmds++ = 0xbf800000;
@@ -858,7 +1026,7 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLOR_MASK);
- *cmds++ = 0x0000000f;
+ *cmds++ = 0x0000000f; /* R = G = B = 1:enabled */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_COLOR_DEST_MASK);
@@ -869,9 +1037,12 @@
*cmds++ = 0x00000000;
*cmds++ = 0x00000000;
+ /* load the stencil ref value
+ * $AAM - do this later
+ */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(REG_RB_MODECONTROL);
-
+ /* draw pixels with color and depth/stencil component */
*cmds++ = 0x4;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
@@ -884,19 +1055,19 @@
*cmds++ = 0x0000000;
*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
- *cmds++ = 0;
-
+ *cmds++ = 0; /* viz query info. */
+ /* PrimType=RectList, SrcSel=AutoIndex, VisCullMode=Ignore*/
*cmds++ = 0x00004088;
- *cmds++ = 3;
+ *cmds++ = 3; /* NumIndices=3 */
} else {
-
+ /* queue the draw packet */
*cmds++ = cp_type3_packet(CP_DRAW_INDX, 2);
- *cmds++ = 0;
-
+ *cmds++ = 0; /* viz query info. */
+ /* PrimType=RectList, NumIndices=3, SrcSel=AutoIndex */
*cmds++ = 0x00030088;
}
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
return cmds;
@@ -915,17 +1086,17 @@
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
-
-
+ /* H/W Registers */
+ /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
cmd++;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-
+ /* Force mismatch */
*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
#else
*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
#endif
-
+ /* Based on chip id choose the registers ranges*/
if (adreno_is_a220(adreno_dev)) {
ptr_register_ranges = register_ranges_a220;
reg_array_size = ARRAY_SIZE(register_ranges_a220);
@@ -943,16 +1114,19 @@
ptr_register_ranges[i*2+1]);
}
+ /* Now we know how many register blocks we have, we can compute command
+ * length
+ */
start[2] =
cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, (cmd - start) - 3);
-
+ /* Enable shadowing for the entire register block. */
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
- start[4] |= (0 << 24) | (4 << 16);
+ start[4] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
#else
start[4] |= (1 << 24) | (4 << 16);
#endif
-
+ /* Need to handle some of the registers separately */
*cmd++ = cp_type0_packet(REG_SQ_GPR_MANAGEMENT, 1);
tmp_ctx.reg_values[0] = virt2gpu(cmd, &drawctxt->gpustate);
*cmd++ = 0x00040400;
@@ -976,42 +1150,48 @@
}
}
-
+ /* ALU Constants */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = drawctxt->gpustate.gpuaddr & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
- *cmd++ = (0 << 24) | (0 << 16) | 0;
+ *cmd++ = (0 << 24) | (0 << 16) | 0; /* Disable shadowing */
#else
*cmd++ = (1 << 24) | (0 << 16) | 0;
#endif
*cmd++ = ALU_CONSTANTS;
-
+ /* Texture Constants */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = (drawctxt->gpustate.gpuaddr + TEX_OFFSET) & 0xFFFFE000;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-
+ /* Disable shadowing */
*cmd++ = (0 << 24) | (1 << 16) | 0;
#else
*cmd++ = (1 << 24) | (1 << 16) | 0;
#endif
*cmd++ = TEX_CONSTANTS;
-
+ /* Boolean Constants */
*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + BOOL_CONSTANTS);
*cmd++ = (2 << 16) | 0;
+ /* the next BOOL_CONSTANT dwords is the shadow area for
+ * boolean constants.
+ */
tmp_ctx.bool_shadow = virt2gpu(cmd, &drawctxt->gpustate);
cmd += BOOL_CONSTANTS;
-
+ /* Loop Constants */
*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 1 + LOOP_CONSTANTS);
*cmd++ = (3 << 16) | 0;
+ /* the next LOOP_CONSTANTS dwords is the shadow area for
+ * loop constants.
+ */
tmp_ctx.loop_shadow = virt2gpu(cmd, &drawctxt->gpustate);
cmd += LOOP_CONSTANTS;
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->reg_restore, start, cmd);
tmp_ctx.cmd = cmd;
@@ -1027,45 +1207,45 @@
unsigned int *partition1;
unsigned int *shaderBases, *partition2;
-
+ /* compute vertex, pixel and shared instruction shadow GPU addresses */
tmp_ctx.shader_vertex = drawctxt->gpustate.gpuaddr + SHADER_OFFSET;
tmp_ctx.shader_pixel = tmp_ctx.shader_vertex
+ _shader_shadow_size(adreno_dev);
tmp_ctx.shader_shared = tmp_ctx.shader_pixel
+ _shader_shadow_size(adreno_dev);
-
+ /* restore shader partitioning and instructions */
- restore = cmd;
+ restore = cmd; /* start address */
-
+ /* Invalidate Vertex & Pixel instruction code address and sizes */
*cmd++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
- *cmd++ = 0x00000300;
+ *cmd++ = 0x00000300; /* 0x100 = Vertex, 0x200 = Pixel */
-
+ /* Restore previous shader vertex & pixel instruction bases. */
*cmd++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
- shaderBases = cmd++;
+ shaderBases = cmd++; /* TBD #5: shader bases (from fixup) */
-
+ /* write the shader partition information to a scratch register */
*cmd++ = cp_type0_packet(REG_SQ_INST_STORE_MANAGMENT, 1);
- partition1 = cmd++;
+ partition1 = cmd++; /* TBD #4a: partition info (from save) */
-
+ /* load vertex shader instructions from the shadow. */
*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
- *cmd++ = tmp_ctx.shader_vertex + 0x0;
- startSizeVtx = cmd++;
+ *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */
+ startSizeVtx = cmd++; /* TBD #1: start/size (from save) */
-
+ /* load pixel shader instructions from the shadow. */
*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
- *cmd++ = tmp_ctx.shader_pixel + 0x1;
- startSizePix = cmd++;
+ *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */
+ startSizePix = cmd++; /* TBD #2: start/size (from save) */
-
+ /* load shared shader instructions from the shadow. */
*cmd++ = cp_type3_packet(CP_IM_LOAD, 2);
- *cmd++ = tmp_ctx.shader_shared + 0x2;
- startSizeShared = cmd++;
+ *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */
+ startSizeShared = cmd++; /* TBD #3: start/size (from save) */
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->shader_restore, restore, cmd);
/*
@@ -1077,81 +1257,86 @@
* have been written.
*/
- fixup = cmd;
+ fixup = cmd; /* start address */
-
+ /* write the shader partition information to a scratch register */
*cmd++ = cp_type0_packet(REG_SCRATCH_REG2, 1);
- partition2 = cmd++;
+ partition2 = cmd++; /* TBD #4b: partition info (from save) */
-
+ /* mask off unused bits, then OR with shader instruction memory size */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = REG_SCRATCH_REG2;
-
+ /* AND off invalid bits. */
*cmd++ = 0x0FFF0FFF;
-
+ /* OR in instruction memory size. */
*cmd++ = adreno_encode_istore_size(adreno_dev);
-
+ /* write the computed value to the SET_SHADER_BASES data field */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = REG_SCRATCH_REG2;
-
+ /* TBD #5: shader bases (to restore) */
*cmd++ = virt2gpu(shaderBases, &drawctxt->gpustate);
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->shader_fixup, fixup, cmd);
-
+ /* save shader partitioning and instructions */
- save = cmd;
+ save = cmd; /* start address */
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
+ /* fetch the SQ_INST_STORE_MANAGMENT register value,
+ * store the value in the data fields of the SET_CONSTANT commands
+ * above.
+ */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
-
+ /* TBD #4a: partition info (to restore) */
*cmd++ = virt2gpu(partition1, &drawctxt->gpustate);
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = REG_SQ_INST_STORE_MANAGMENT;
-
+ /* TBD #4b: partition info (to fixup) */
*cmd++ = virt2gpu(partition2, &drawctxt->gpustate);
-
+ /* store the vertex shader instructions */
*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
- *cmd++ = tmp_ctx.shader_vertex + 0x0;
-
+ *cmd++ = tmp_ctx.shader_vertex + 0x0; /* 0x0 = Vertex */
+ /* TBD #1: start/size (to restore) */
*cmd++ = virt2gpu(startSizeVtx, &drawctxt->gpustate);
-
+ /* store the pixel shader instructions */
*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
- *cmd++ = tmp_ctx.shader_pixel + 0x1;
-
+ *cmd++ = tmp_ctx.shader_pixel + 0x1; /* 0x1 = Pixel */
+ /* TBD #2: start/size (to restore) */
*cmd++ = virt2gpu(startSizePix, &drawctxt->gpustate);
-
+ /* store the shared shader instructions if vertex base is nonzero */
*cmd++ = cp_type3_packet(CP_IM_STORE, 2);
- *cmd++ = tmp_ctx.shader_shared + 0x2;
-
+ *cmd++ = tmp_ctx.shader_shared + 0x2; /* 0x2 = Shared */
+ /* TBD #3: start/size (to restore) */
*cmd++ = virt2gpu(startSizeShared, &drawctxt->gpustate);
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
-
+ /* create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->shader_save, save, cmd);
tmp_ctx.cmd = cmd;
}
+/* create buffers for saving/restoring registers, constants, & GMEM */
static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
{
drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
-
+ /* build indirect command buffers to save & restore regs/constants */
build_regrestore_cmds(adreno_dev, drawctxt);
build_regsave_cmds(adreno_dev, drawctxt);
@@ -1160,6 +1345,7 @@
return 0;
}
+/* create buffers for saving/restoring registers, constants, & GMEM */
static int a2xx_create_gmem_shadow(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
{
@@ -1174,22 +1360,22 @@
if (result)
return result;
-
+ /* set the gmem shadow flag for the context */
drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
-
+ /* blank out gmem shadow. */
kgsl_sharedmem_set(&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
drawctxt->context_gmem_shadow.size);
-
+ /* build quad vertex buffer */
build_quad_vtxbuff(drawctxt, &drawctxt->context_gmem_shadow,
&tmp_ctx.cmd);
-
+ /* build TP0_CHICKEN register restore command buffer */
if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE))
tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
-
+ /* build indirect command buffers to save & restore gmem */
drawctxt->context_gmem_shadow.gmem_save_commands = tmp_ctx.cmd;
tmp_ctx.cmd =
build_gmem2sys_cmds(adreno_dev, drawctxt,
@@ -1215,6 +1401,12 @@
{
int ret;
+ /*
+ * Allocate memory for the GPU state and the context commands.
+ * Despite the name, this is much more then just storage for
+ * the gpustate. This contains command space for gmem save
+ * and texture and vertex buffer storage too
+ */
ret = kgsl_allocate(&drawctxt->gpustate,
drawctxt->pagetable, _context_size(adreno_dev));
@@ -1242,7 +1434,7 @@
goto done;
}
-
+ /* Flush and sync the gpustate memory */
kgsl_cache_range_op(&drawctxt->gpustate,
KGSL_CACHE_OP_FLUSH);
@@ -1267,11 +1459,23 @@
if (adreno_is_a225(adreno_dev)) {
adreno_dev->gpudev->ctx_switches_since_last_draw++;
+ /* If there have been > than
+ * ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW calls to context
+ * switches w/o gmem being saved then we need to execute
+ * this workaround */
if (adreno_dev->gpudev->ctx_switches_since_last_draw >
ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW)
adreno_dev->gpudev->ctx_switches_since_last_draw = 0;
else
return;
+ /*
+ * Issue an empty draw call to avoid possible hangs due to
+ * repeated idles without intervening draw calls.
+ * On adreno 225 the PC block has a cache that is only
+ * flushed on draw calls and repeated idles can make it
+ * overflow. The gmem save path contains draw calls so
+ * this workaround isn't needed there.
+ */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = (0x4 << 16) | (REG_PA_SU_SC_MODE_CNTL - 0x2000);
*cmds++ = 0;
@@ -1284,6 +1488,11 @@
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
} else {
+ /* On Adreno 20x/220, if the events for shader space reuse
+ * gets dropped, the CP block would wait indefinitely.
+ * Sending CP_SET_SHADER_BASES packet unblocks the CP from
+ * this wait.
+ */
*cmds++ = cp_type3_packet(CP_SET_SHADER_BASES, 1);
*cmds++ = adreno_encode_istore_size(adreno_dev)
| adreno_dev->pix_shader_start;
@@ -1298,7 +1507,7 @@
{
struct kgsl_device *device = &adreno_dev->dev;
- if (context == NULL)
+ if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
return;
if (context->flags & CTXT_FLAGS_GPU_HANG)
@@ -1307,17 +1516,21 @@
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* save registers and constants. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->reg_save, 3);
if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
-
+ /* save shader partitioning and instructions. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->shader_save, 3);
+ /*
+ * fixup shader partitioning parameter for
+ * SET_SHADER_BASES.
+ */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->shader_fixup, 3);
@@ -1328,11 +1541,14 @@
if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
(context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+ /* save gmem.
+ * (note: changes shader. shader must already be saved.)
+ */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.gmem_save, 3);
-
+ /* Restore TP0_CHICKEN */
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
@@ -1352,7 +1568,7 @@
unsigned int cmds[5];
if (context == NULL) {
-
+ /* No context - set the default apgetable and thats it */
kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
adreno_dev->drawctxt_active->id);
return;
@@ -1376,13 +1592,16 @@
REG_SHADOW_SIZE + CMD_BUFFER_SIZE + TEX_SHADOW_SIZE, false);
#endif
+ /* restore gmem.
+ * (note: changes shader. shader must not already be restored.)
+ */
if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
context->context_gmem_shadow.gmem_restore, 3);
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* Restore TP0_CHICKEN */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->chicken_restore, 3);
@@ -1393,11 +1612,11 @@
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* restore registers and constants. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
-
+ /* restore shader instructions & partitioning. */
if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
@@ -1413,6 +1632,16 @@
}
}
+/*
+ * Interrupt management
+ *
+ * a2xx interrupt control is distributed among the various
+ * hardware components (RB, CP, MMU). The main interrupt
+ * tells us which component fired the interrupt, but one needs
+ * to go to the individual component to find out why. The
+ * following functions provide the broken out support for
+ * managing the interrupts
+ */
#define RBBM_INT_MASK RBBM_INT_CNTL__RDERR_INT_MASK
@@ -1467,6 +1696,11 @@
if (!status) {
if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) {
+ /* This indicates that we could not read CP_INT_STAT.
+ * As a precaution just wake up processes so
+ * they can check their timestamps. Since, we
+ * did not ack any interrupts this interrupt will
+ * be generated again */
KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n");
wake_up_interruptible_all(&device->wait_queue);
} else
@@ -1474,32 +1708,21 @@
return;
}
- if (status & CP_INT_CNTL__RB_INT_MASK) {
-
- unsigned int context_id;
- kgsl_sharedmem_readl(&device->memstore,
- &context_id,
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- current_context));
- if (context_id < KGSL_MEMSTORE_MAX) {
- kgsl_sharedmem_writel(&rb->device->memstore,
- KGSL_MEMSTORE_OFFSET(context_id,
- ts_cmp_enable), 0);
- wmb();
- }
- KGSL_CMD_WARN(rb->device, "ringbuffer rb interrupt\n");
- }
-
for (i = 0; i < ARRAY_SIZE(kgsl_cp_error_irqs); i++) {
if (status & kgsl_cp_error_irqs[i].mask) {
KGSL_CMD_CRIT(rb->device, "%s\n",
kgsl_cp_error_irqs[i].message);
+ /*
+ * on fatal errors, turn off the interrupts to
+ * avoid storming. This has the side effect of
+ * forcing a PM dump when the timestamp times out
+ */
kgsl_pwrctrl_irq(rb->device, KGSL_PWRFLAGS_OFF);
}
}
-
+ /* only ack bits we understand */
status &= CP_INT_MASK;
adreno_regwrite(device, REG_CP_INT_ACK, status);
@@ -1526,9 +1749,14 @@
adreno_regread(device, REG_RBBM_READ_ERROR, &rderr);
source = (rderr & RBBM_READ_ERROR_REQUESTER)
? "host" : "cp";
-
+ /* convert to dword address */
addr = (rderr & RBBM_READ_ERROR_ADDRESS_MASK) >> 2;
+ /*
+ * Log CP_INT_STATUS interrupts from the CP at a
+ * lower level because they can happen frequently
+ * and are worked around in a2xx_irq_handler.
+ */
if (addr == REG_CP_INT_STATUS &&
rderr & RBBM_READ_ERROR_ERROR &&
rderr & RBBM_READ_ERROR_REQUESTER)
@@ -1586,25 +1814,38 @@
adreno_regwrite(device, MH_INTERRUPT_MASK, 0);
}
-
+ /* Force the writes to post before touching the IRQ line */
wmb();
}
+static unsigned int a2xx_irq_pending(struct adreno_device *adreno_dev)
+{
+ struct kgsl_device *device = &adreno_dev->dev;
+ unsigned int rbbm, cp, mh;
+
+ adreno_regread(device, REG_RBBM_INT_CNTL, &rbbm);
+ adreno_regread(device, REG_CP_INT_CNTL, &cp);
+ adreno_regread(device, MH_INTERRUPT_MASK, &mh);
+
+ return ((rbbm & RBBM_INT_MASK) || (cp & CP_INT_MASK) ||
+ (mh & kgsl_mmu_get_int_mask())) ? 1 : 0;
+}
+
static void a2xx_rb_init(struct adreno_device *adreno_dev,
struct adreno_ringbuffer *rb)
{
unsigned int *cmds, cmds_gpu;
-
- cmds = adreno_ringbuffer_allocspace(rb, 19);
+ /* ME_INIT */
+ cmds = adreno_ringbuffer_allocspace(rb, NULL, 19);
cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint)*(rb->wptr-19);
GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 18));
-
+ /* All fields present (bits 9:0) */
GSL_RB_WRITE(cmds, cmds_gpu, 0x000003ff);
-
+ /* Disable/Enable Real-Time Stream processing (present but ignored) */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
+ /* Enable (2D <-> 3D) implicit synchronization (present but ignored) */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
GSL_RB_WRITE(cmds, cmds_gpu,
@@ -1624,23 +1865,28 @@
GSL_RB_WRITE(cmds, cmds_gpu,
SUBBLOCK_OFFSET(REG_PA_SU_POLY_OFFSET_FRONT_SCALE));
-
+ /* Instruction memory size: */
GSL_RB_WRITE(cmds, cmds_gpu,
(adreno_encode_istore_size(adreno_dev)
| adreno_dev->pix_shader_start));
-
+ /* Maximum Contexts */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
+ /* Write Confirm Interval and The CP will wait the
+ * wait_interval * 16 clocks between polling */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
+ /* NQ and External Memory Swap */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
+ /* Protected mode error checking
+ * If iommu is used then protection needs to be turned off
+ * to enable context bank switching */
if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
GSL_RB_WRITE(cmds, cmds_gpu, 0);
else
GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL);
-
+ /* Disable header dumping and Header dump address */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
+ /* Header dump size */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
adreno_ringbuffer_submit(rb);
@@ -1651,18 +1897,18 @@
struct kgsl_device *device = &adreno_dev->dev;
unsigned int reg, val;
-
+ /* Freeze the counter */
adreno_regwrite(device, REG_CP_PERFMON_CNTL,
REG_PERF_MODE_CNT | REG_PERF_STATE_FREEZE);
-
+ /* Get the value */
adreno_regread(device, REG_RBBM_PERFCOUNTER1_LO, &val);
-
+ /* Reset the counter */
adreno_regwrite(device, REG_CP_PERFMON_CNTL,
REG_PERF_MODE_CNT | REG_PERF_STATE_RESET);
-
+ /* Re-Enable the performance monitors */
adreno_regread(device, REG_RBBM_PM_OVERRIDE2, ®);
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, (reg | 0x40));
adreno_regwrite(device, REG_RBBM_PERFCOUNTER1_SELECT, 0x1);
@@ -1679,7 +1925,7 @@
unsigned int gmem_size;
unsigned int edram_value = 0;
-
+ /* get edram_size value equivalent */
gmem_size = (adreno_dev->gmem_size >> 14);
while (gmem_size >>= 1)
edram_value++;
@@ -1687,9 +1933,9 @@
rb_edram_info.val = 0;
rb_edram_info.f.edram_size = edram_value;
- rb_edram_info.f.edram_mapping_mode = 0;
+ rb_edram_info.f.edram_mapping_mode = 0; /* EDRAM_MAP_UPPER */
-
+ /* must be aligned to size */
rb_edram_info.f.edram_range = (adreno_dev->gmem_base >> 14);
adreno_regwrite(device, REG_RB_EDRAM_INFO, rb_edram_info.val);
@@ -1699,9 +1945,18 @@
{
struct kgsl_device *device = &adreno_dev->dev;
+ /*
+ * We need to make sure all blocks are powered up and clocked
+ * before issuing a soft reset. The overrides will then be
+ * turned off (set to 0)
+ */
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE1, 0xfffffffe);
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xffffffff);
+ /*
+ * Only reset CP block if all blocks have previously been
+ * reset
+ */
if (!(device->flags & KGSL_FLAGS_SOFT_RESET) ||
!adreno_is_a22x(adreno_dev)) {
adreno_regwrite(device, REG_RBBM_SOFT_RESET,
@@ -1711,17 +1966,24 @@
adreno_regwrite(device, REG_RBBM_SOFT_RESET,
0x00000001);
}
+ /*
+ * The core is in an indeterminate state until the reset
+ * completes after 30ms.
+ */
msleep(30);
adreno_regwrite(device, REG_RBBM_SOFT_RESET, 0x00000000);
if (adreno_is_a225(adreno_dev)) {
-
+ /* Enable large instruction store for A225 */
adreno_regwrite(device, REG_SQ_FLOW_CONTROL,
0x18000000);
}
- if (adreno_is_a203(adreno_dev))
+ if (adreno_is_a20x(adreno_dev))
+ /* For A20X based targets increase number of clocks
+ * that RBBM will wait before de-asserting Register
+ * Clock Active signal */
adreno_regwrite(device, REG_RBBM_CNTL, 0x0000FFFF);
else
adreno_regwrite(device, REG_RBBM_CNTL, 0x00004442);
@@ -1741,7 +2003,7 @@
adreno_regwrite(device, REG_RBBM_DEBUG, 0x00080000);
-
+ /* Make sure interrupts are disabled */
adreno_regwrite(device, REG_RBBM_INT_CNTL, 0);
adreno_regwrite(device, REG_CP_INT_CNTL, 0);
adreno_regwrite(device, REG_SQ_INT_CNTL, 0);
@@ -1749,6 +2011,7 @@
a2xx_gmeminit(adreno_dev);
}
+/* Defined in adreno_a2xx_snapshot.c */
void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang);
@@ -1763,6 +2026,7 @@
.ctxt_draw_workaround = a2xx_drawctxt_draw_workaround,
.irq_handler = a2xx_irq_handler,
.irq_control = a2xx_irq_control,
+ .irq_pending = a2xx_irq_pending,
.snapshot = a2xx_snapshot,
.rb_init = a2xx_rb_init,
.busy_cycles = a2xx_busy_cycles,
diff --git a/drivers/gpu/msm/adreno_a2xx_snapshot.c b/drivers/gpu/msm/adreno_a2xx_snapshot.c
index e1cf325..75795b1 100644
--- a/drivers/gpu/msm/adreno_a2xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a2xx_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -18,6 +18,7 @@
#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+ sizeof(struct kgsl_snapshot_debug))
+/* Dump the SX debug registers into a GPU snapshot debug section */
#define SXDEBUG_COUNT 0x1B
@@ -73,6 +74,11 @@
return DEBUG_SECTION_SZ(CPDEBUG_COUNT);
}
+/*
+ * The contents of the SQ debug sections are dword pairs:
+ * [register offset]:[value]
+ * This macro writes both dwords for the given register
+ */
#define SQ_DEBUG_WRITE(_device, _reg, _data, _offset) \
do { _data[(_offset)++] = (_reg); \
@@ -218,15 +224,19 @@
return DEBUG_SECTION_SZ(MIUDEBUG_COUNT);
}
+/* A2XX GPU snapshot function - this is where all of the A2XX specific
+ * bits and pieces are grabbed into the snapshot memory
+ */
void *a2xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang)
{
struct kgsl_device *device = &adreno_dev->dev;
+ struct kgsl_snapshot_registers_list list;
struct kgsl_snapshot_registers regs;
unsigned int pmoverride;
-
+ /* Choose the register set to dump */
if (adreno_is_a20x(adreno_dev)) {
regs.regs = (unsigned int *) a200_registers;
@@ -239,85 +249,96 @@
regs.count = a225_registers_count;
}
-
+ list.registers = ®s;
+ list.count = 1;
+
+ /* Master set of (non debug) registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_REGS, snapshot, remain,
- kgsl_snapshot_dump_regs, ®s);
+ kgsl_snapshot_dump_regs, &list);
-
+ /* CP_STATE_DEBUG indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_CP_STATE_DEBUG_INDEX,
REG_CP_STATE_DEBUG_DATA, 0x0, 0x14);
-
+ /* CP_ME indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_CP_ME_CNTL, REG_CP_ME_STATUS,
64, 44);
+ /*
+ * Need to temporarily turn off clock gating for the debug bus to
+ * work
+ */
adreno_regread(device, REG_RBBM_PM_OVERRIDE2, &pmoverride);
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, 0xFF);
-
+ /* SX debug registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a2xx_snapshot_sxdebug, NULL);
-
+ /* SU debug indexed registers (only for < 470) */
if (!adreno_is_a22x(adreno_dev))
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_PA_SU_DEBUG_CNTL,
REG_PA_SU_DEBUG_DATA,
0, 0x1B);
-
+ /* CP debug registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a2xx_snapshot_cpdebug, NULL);
-
+ /* MH debug indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, MH_DEBUG_CTRL, MH_DEBUG_DATA, 0x0, 0x40);
-
+ /* Leia only register sets */
if (adreno_is_a22x(adreno_dev)) {
-
+ /* RB DEBUG indexed regisers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_RB_DEBUG_CNTL, REG_RB_DEBUG_DATA, 0, 8);
-
+ /* RB DEBUG indexed registers bank 2 */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_RB_DEBUG_CNTL, REG_RB_DEBUG_DATA + 0x1000,
0, 8);
-
+ /* PC_DEBUG indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_PC_DEBUG_CNTL, REG_PC_DEBUG_DATA, 0, 8);
-
+ /* GRAS_DEBUG indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_GRAS_DEBUG_CNTL, REG_GRAS_DEBUG_DATA, 0, 4);
-
+ /* MIU debug registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a2xx_snapshot_miudebug, NULL);
-
+ /* SQ DEBUG debug registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a2xx_snapshot_sqdebug, NULL);
+ /*
+ * Reading SQ THREAD causes bad things to happen on a running
+ * system, so only read it if the GPU is already hung
+ */
if (hang) {
-
+ /* SQ THREAD debug registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a2xx_snapshot_sqthreaddebug, NULL);
}
}
-
+ /* Reset the clock gating */
adreno_regwrite(device, REG_RBBM_PM_OVERRIDE2, pmoverride);
return snapshot;
diff --git a/drivers/gpu/msm/adreno_a2xx_trace.c b/drivers/gpu/msm/adreno_a2xx_trace.c
index b398c74..87c930b 100644
--- a/drivers/gpu/msm/adreno_a2xx_trace.c
+++ b/drivers/gpu/msm/adreno_a2xx_trace.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -14,5 +14,6 @@
#include "kgsl.h"
#include "adreno.h"
+/* Instantiate tracepoints */
#define CREATE_TRACE_POINTS
#include "adreno_a2xx_trace.h"
diff --git a/drivers/gpu/msm/adreno_a2xx_trace.h b/drivers/gpu/msm/adreno_a2xx_trace.h
index b4fb47d..af355d6 100644
--- a/drivers/gpu/msm/adreno_a2xx_trace.h
+++ b/drivers/gpu/msm/adreno_a2xx_trace.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,9 @@
struct kgsl_device;
+/*
+ * Tracepoint for a2xx irq. Includes status info
+ */
TRACE_EVENT(kgsl_a2xx_irq_status,
TP_PROTO(struct kgsl_device *device, unsigned int master_status,
@@ -69,6 +72,7 @@
)
);
-#endif
+#endif /* _ADRENO_A2XX_TRACE_H */
+/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index d550c62..3d9ec6d 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -22,6 +22,11 @@
#include "a3xx_reg.h"
#include "adreno_a3xx_trace.h"
+/*
+ * Set of registers to dump for A3XX on postmortem and snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
const unsigned int a3xx_registers[] = {
0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
@@ -64,6 +69,9 @@
const unsigned int a3xx_registers_count = ARRAY_SIZE(a3xx_registers) / 2;
+/* Removed the following HLSQ register ranges from being read during
+ * fault tolerance since reading the registers may cause the device to hang:
+ */
const unsigned int a3xx_hlsq_registers[] = {
0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a,
@@ -73,26 +81,58 @@
const unsigned int a3xx_hlsq_registers_count =
ARRAY_SIZE(a3xx_hlsq_registers) / 2;
+/* The set of additional registers to be dumped for A330 */
+
+const unsigned int a330_registers[] = {
+ 0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453,
+};
+
+const unsigned int a330_registers_count = ARRAY_SIZE(a330_registers) / 2;
+
+/* Simple macro to facilitate bit setting in the gmem2sys and sys2gmem
+ * functions.
+ */
#define _SET(_shift, _val) ((_val) << (_shift))
+/*
+ ****************************************************************************
+ *
+ * Context state shadow structure:
+ *
+ * +---------------------+------------+-------------+---------------------+---+
+ * | ALU Constant Shadow | Reg Shadow | C&V Buffers | Shader Instr Shadow |Tex|
+ * +---------------------+------------+-------------+---------------------+---+
+ *
+ * 8K - ALU Constant Shadow (8K aligned)
+ * 4K - H/W Register Shadow (8K aligned)
+ * 5K - Command and Vertex Buffers
+ * 8K - Shader Instruction Shadow
+ * ~6K - Texture Constant Shadow
+ *
+ *
+ ***************************************************************************
+ */
-#define ALU_SHADOW_SIZE (8*1024)
-#define REG_SHADOW_SIZE (4*1024)
-#define CMD_BUFFER_SIZE (5*1024)
-#define TEX_SIZE_MEM_OBJECTS 896
-#define TEX_SIZE_MIPMAP 1936
-#define TEX_SIZE_SAMPLER_OBJ 256
+/* Sizes of all sections in state shadow memory */
+#define ALU_SHADOW_SIZE (8*1024) /* 8KB */
+#define REG_SHADOW_SIZE (4*1024) /* 4KB */
+#define CMD_BUFFER_SIZE (5*1024) /* 5KB */
+#define TEX_SIZE_MEM_OBJECTS 896 /* bytes */
+#define TEX_SIZE_MIPMAP 1936 /* bytes */
+#define TEX_SIZE_SAMPLER_OBJ 256 /* bytes */
#define TEX_SHADOW_SIZE \
((TEX_SIZE_MEM_OBJECTS + TEX_SIZE_MIPMAP + \
- TEX_SIZE_SAMPLER_OBJ)*2)
-#define SHADER_SHADOW_SIZE (8*1024)
+ TEX_SIZE_SAMPLER_OBJ)*2) /* ~6KB */
+#define SHADER_SHADOW_SIZE (8*1024) /* 8KB */
+/* Total context size, excluding GMEM shadow */
#define CONTEXT_SIZE \
(ALU_SHADOW_SIZE+REG_SHADOW_SIZE + \
CMD_BUFFER_SIZE+SHADER_SHADOW_SIZE + \
TEX_SHADOW_SIZE)
+/* Offsets to different sections in context shadow memory */
#define REG_OFFSET ALU_SHADOW_SIZE
#define CMD_OFFSET (REG_OFFSET+REG_SHADOW_SIZE)
#define SHADER_OFFSET (CMD_OFFSET+CMD_BUFFER_SIZE)
@@ -105,12 +145,14 @@
#define FS_TEX_OFFSET_MIPMAP (FS_TEX_OFFSET_MEM_OBJECTS+TEX_SIZE_MEM_OBJECTS)
#define FS_TEX_OFFSET_SAMPLER_OBJ (FS_TEX_OFFSET_MIPMAP+TEX_SIZE_MIPMAP)
+/* The offset for fragment shader data in HLSQ context */
#define SSIZE (16*1024)
#define HLSQ_SAMPLER_OFFSET 0x000
#define HLSQ_MEMOBJ_OFFSET 0x400
#define HLSQ_MIPMAP_OFFSET 0x800
+/* Use shadow RAM */
#define HLSQ_SHADOW_BASE (0x10000+SSIZE*2)
#define REG_TO_MEM_LOOP_COUNT_SHIFT 18
@@ -124,6 +166,10 @@
((vis_cull_mode) << PC_DRAW_INITIATOR_VISIBILITY_CULLING_MODE) | \
(1 << PC_DRAW_INITIATOR_PRE_DRAW_INITIATOR_ENABLE))
+/*
+ * List of context registers (starting from dword offset 0x2000).
+ * Each line contains start and end of a range of registers.
+ */
static const unsigned int context_register_ranges[] = {
A3XX_GRAS_CL_CLIP_CNTL, A3XX_GRAS_CL_CLIP_CNTL,
A3XX_GRAS_CL_GB_CLIP_ADJ, A3XX_GRAS_CL_GB_CLIP_ADJ,
@@ -158,6 +204,7 @@
A3XX_VPC_ATTR, A3XX_VPC_VARY_CYLWRAP_ENABLE_1,
};
+/* Global registers that need to be saved separately */
static const unsigned int global_registers[] = {
A3XX_GRAS_CL_USER_PLANE_X0, A3XX_GRAS_CL_USER_PLANE_Y0,
A3XX_GRAS_CL_USER_PLANE_Z0, A3XX_GRAS_CL_USER_PLANE_W0,
@@ -189,31 +236,35 @@
#define GLOBAL_REGISTER_COUNT ARRAY_SIZE(global_registers)
+/* A scratchpad used to build commands during context create */
static struct tmp_ctx {
- unsigned int *cmd;
+ unsigned int *cmd; /* Next available dword in C&V buffer */
-
+ /* Addresses in comamnd buffer where registers are saved */
uint32_t reg_values[GLOBAL_REGISTER_COUNT];
- uint32_t gmem_base;
+ uint32_t gmem_base; /* Base GPU address of GMEM */
} tmp_ctx;
#ifndef GSL_CONTEXT_SWITCH_CPU_SYNC
+/*
+ * Function for executing dest = ( (reg & and) ROL rol ) | or
+ */
static unsigned int *rmw_regtomem(unsigned int *cmd,
unsigned int reg, unsigned int and,
unsigned int rol, unsigned int or,
unsigned int dest)
{
-
+ /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | reg */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
- *cmd++ = 0x00000000;
- *cmd++ = reg;
+ *cmd++ = 0x00000000; /* AND value */
+ *cmd++ = reg; /* OR address */
-
+ /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & and) ROL rol ) | or */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = (rol << 24) | A3XX_CP_SCRATCH_REG2;
- *cmd++ = and;
- *cmd++ = or;
+ *cmd++ = and; /* AND value */
+ *cmd++ = or; /* OR value */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_CP_SCRATCH_REG2;
@@ -241,15 +292,23 @@
*cmd++ = 0;
#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
+ /*
+ * Context registers are already shadowed; just need to
+ * disable shadowing to prevent corruption.
+ */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
- *cmd++ = 4 << 16;
- *cmd++ = 0x0;
+ *cmd++ = 4 << 16; /* regs, start=0 */
+ *cmd++ = 0x0; /* count = 0 */
#else
+ /*
+ * Make sure the HW context has the correct register values before
+ * reading them.
+ */
-
+ /* Write context registers into shadow */
for (i = 0; i < ARRAY_SIZE(context_register_ranges) / 2; i++) {
unsigned int start = context_register_ranges[i * 2];
unsigned int end = context_register_ranges[i * 2 + 1];
@@ -261,42 +320,75 @@
}
#endif
-
+ /* Need to handle some of the global registers separately */
for (i = 0; i < ARRAY_SIZE(global_registers); i++) {
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = global_registers[i];
*cmd++ = tmp_ctx.reg_values[i];
}
-
+ /* Save vertex shader constants */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
*cmd++ = 0x0000FFFF;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
drawctxt->constant_save_commands[1].hostptr = cmd;
drawctxt->constant_save_commands[1].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
-
+ /*
+ From fixup:
+
+ dwords = SP_VS_CTRL_REG1.VSCONSTLENGTH / 4
+ src = (HLSQ_SHADOW_BASE + 0x2000) / 4
+
+ From register spec:
+ SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+ */
+ *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
+ /* ALU constant shadow base */
*cmd++ = drawctxt->gpustate.gpuaddr & 0xfffffffc;
-
+ /* Save fragment shader constants */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
*cmd++ = 0x0000FFFF;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
drawctxt->constant_save_commands[2].hostptr = cmd;
drawctxt->constant_save_commands[2].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
- *cmd++ = 0;
+ dwords = SP_FS_CTRL_REG1.FSCONSTLENGTH / 4
+ src = (HLSQ_SHADOW_BASE + 0x2000 + SSIZE) / 4
-
+ From register spec:
+ SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+ */
+ *cmd++ = 0; /* (dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
+
+ /*
+ From fixup:
+
+ base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
+ offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
+
+ From register spec:
+ SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
+ start offset in on chip RAM,
+ 128bit aligned
+
+ dst = base + offset
+ Because of the base alignment we can use
+ dst = base | offset
+ */
+ *cmd++ = 0; /* dst */
+
+ /* Save VS texture memory objects */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ =
((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
@@ -305,7 +397,7 @@
(drawctxt->gpustate.gpuaddr +
VS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
-
+ /* Save VS texture mipmap pointers */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ =
((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
@@ -313,7 +405,7 @@
*cmd++ =
(drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
-
+ /* Save VS texture sampler objects */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = ((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
((HLSQ_SHADOW_BASE + HLSQ_SAMPLER_OFFSET) / 4);
@@ -321,7 +413,7 @@
(drawctxt->gpustate.gpuaddr +
VS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
-
+ /* Save FS texture memory objects */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ =
((TEX_SIZE_MEM_OBJECTS / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
@@ -330,7 +422,7 @@
(drawctxt->gpustate.gpuaddr +
FS_TEX_OFFSET_MEM_OBJECTS) & 0xfffffffc;
-
+ /* Save FS texture mipmap pointers */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ =
((TEX_SIZE_MIPMAP / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
@@ -338,7 +430,7 @@
*cmd++ =
(drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP) & 0xfffffffc;
-
+ /* Save FS texture sampler objects */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ =
((TEX_SIZE_SAMPLER_OBJ / 4) << REG_TO_MEM_LOOP_COUNT_SHIFT) |
@@ -347,12 +439,13 @@
(drawctxt->gpustate.gpuaddr +
FS_TEX_OFFSET_SAMPLER_OBJ) & 0xfffffffc;
-
+ /* Create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->regconstant_save, start, cmd);
tmp_ctx.cmd = cmd;
}
+/* Copy GMEM contents to system memory shadow. */
static unsigned int *build_gmem2sys_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
struct gmem_shadow_t *shadow)
@@ -366,28 +459,28 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
-
+ /* RB_MODE_CONTROL */
*cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RESOLVE_PASS) |
_SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1) |
_SET(RB_MODECONTROL_PACKER_TIMER_ENABLE, 1);
-
+ /* RB_RENDER_CONTROL */
*cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
_SET(RB_RENDERCONTROL_DISABLE_COLOR_PIPE, 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_RB_COPY_CONTROL);
-
+ /* RB_COPY_CONTROL */
*cmds++ = _SET(RB_COPYCONTROL_RESOLVE_CLEAR_MODE,
RB_CLEAR_MODE_RESOLVE) |
_SET(RB_COPYCONTROL_COPY_GMEM_BASE,
tmp_ctx.gmem_base >> 14);
-
+ /* RB_COPY_DEST_BASE */
*cmds++ = _SET(RB_COPYDESTBASE_COPY_DEST_BASE,
shadow->gmemshadow.gpuaddr >> 5);
-
+ /* RB_COPY_DEST_PITCH */
*cmds++ = _SET(RB_COPYDESTPITCH_COPY_DEST_PITCH,
(shadow->pitch * 4) / 32);
-
+ /* RB_COPY_DEST_INFO */
*cmds++ = _SET(RB_COPYDESTINFO_COPY_DEST_TILE,
RB_TILINGMODE_LINEAR) |
_SET(RB_COPYDESTINFO_COPY_DEST_FORMAT, RB_R8G8B8A8_UNORM) |
@@ -396,34 +489,34 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
-
+ /* GRAS_SC_CONTROL */
*cmds++ = _SET(GRAS_SC_CONTROL_RENDER_MODE, 2);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
-
+ /* VFD_CONTROL_0 */
*cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 4) |
_SET(VFD_CTRLREG0_PACKETSIZE, 2) |
_SET(VFD_CTRLREG0_STRMDECINSTRCNT, 1) |
_SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 1);
-
+ /* VFD_CONTROL_1 */
*cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 1) |
_SET(VFD_CTRLREG1_REGID4VTX, 252) |
_SET(VFD_CTRLREG1_REGID4INST, 252);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
-
+ /* VFD_FETCH_INSTR_0_0 */
*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
-
+ /* VFD_FETCH_INSTR_1_0 */
*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
shadow->quad_vertices.gpuaddr);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
-
+ /* VFD_DECODE_INSTR_0 */
*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
@@ -432,47 +525,47 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
-
+ /* HLSQ_CONTROL_0_REG */
*cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
_SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
_SET(HLSQ_CTRL0REG_RESERVED2, 1) |
_SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
-
+ /* HLSQ_CONTROL_1_REG */
*cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
_SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
-
+ /* HLSQ_CONTROL_2_REG */
*cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
-
+ /* HLSQ_CONTROL_3_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
-
+ /* HLSQ_VS_CONTROL_REG */
*cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
-
+ /* HLSQ_FS_CONTROL_REG */
*cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
_SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
_SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 1);
-
+ /* HLSQ_CONST_VSPRESV_RANGE_REG */
*cmds++ = 0x00000000;
-
+ /* HLSQ_CONST_FSPRESV_RANGE_REQ */
*cmds++ = _SET(HLSQ_CONSTFSPRESERVEDRANGEREG_STARTENTRY, 32) |
_SET(HLSQ_CONSTFSPRESERVEDRANGEREG_ENDENTRY, 32);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
-
+ /* SP_FS_LENGTH_REG */
*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
-
+ /* SP_SP_CTRL_REG */
*cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
_SET(SP_SPCTRLREG_LOMODE, 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
*cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
-
+ /* SP_VS_CTRL_REG0 */
*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
_SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
@@ -480,47 +573,47 @@
_SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
_SET(SP_VSCTRLREG0_VSSUPERTHREADMODE, 1) |
_SET(SP_VSCTRLREG0_VSLENGTH, 1);
-
+ /* SP_VS_CTRL_REG1 */
*cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 4);
-
+ /* SP_VS_PARAM_REG */
*cmds++ = _SET(SP_VSPARAMREG_PSIZEREGID, 252);
-
+ /* SP_VS_OUT_REG_0 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_1 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_2 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_3 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_4 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_5 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_6 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG_7 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
*cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
-
+ /* SP_VS_VPC_DST_REG_0 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_VPC_DST_REG_1 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_VPC_DST_REG_2 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_VPC_DST_REG_3 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OBJ_OFFSET_REG */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OBJ_START_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
*cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
-
+ /* SP_VS_LENGTH_REG */
*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
-
+ /* SP_FS_CTRL_REG0 */
*cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
_SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
_SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
@@ -529,61 +622,61 @@
_SET(SP_FSCTRLREG0_FSTHREADSIZE, SP_FOUR_PIX_QUADS) |
_SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
_SET(SP_FSCTRLREG0_FSLENGTH, 1);
-
+ /* SP_FS_CTRL_REG1 */
*cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
_SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
-
+ /* SP_FS_OBJ_OFFSET_REG */
*cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
_SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 127);
-
+ /* SP_FS_OBJ_START_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
-
+ /* SP_FS_FLAT_SHAD_MODE_REG_0 */
*cmds++ = 0x00000000;
-
+ /* SP_FS_FLAT_SHAD_MODE_REG_1 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
-
+ /* SP_FS_OUTPUT_REG */
*cmds++ = _SET(SP_IMAGEOUTPUTREG_DEPTHOUTMODE, SP_PIXEL_BASED);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
-
+ /* SP_FS_MRT_REG_0 */
*cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
-
+ /* SP_FS_MRT_REG_1 */
*cmds++ = 0x00000000;
-
+ /* SP_FS_MRT_REG_2 */
*cmds++ = 0x00000000;
-
+ /* SP_FS_MRT_REG_3 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
*cmds++ = CP_REG(A3XX_VPC_ATTR);
-
+ /* VPC_ATTR */
*cmds++ = _SET(VPC_VPCATTR_THRHDASSIGN, 1) |
_SET(VPC_VPCATTR_LMSIZE, 1);
-
+ /* VPC_PACK */
*cmds++ = 0x00000000;
-
+ /* VPC_VARRYING_INTERUPT_MODE_0 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARRYING_INTERUPT_MODE_1 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARRYING_INTERUPT_MODE_2 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARRYING_INTERUPT_MODE_3 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_PS_REPL_MODE_0 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_PS_REPL_MODE_1 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_PS_REPL_MODE_2 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_PS_REPL_MODE_3 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
@@ -594,13 +687,13 @@
*cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
-
+ /* (sy)(rpt3)mov.f32f32 r0.y, (r)r1.y; */
*cmds++ = 0x00000000; *cmds++ = 0x13001000;
-
+ /* end; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
@@ -618,13 +711,13 @@
*cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
-
+ /* (sy)(rpt3)mov.f32f32 r0.y, (r)c0.x; */
*cmds++ = 0x00000000; *cmds++ = 0x30201b00;
-
+ /* end; */
*cmds++ = 0x00000000; *cmds++ = 0x03000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
@@ -638,18 +731,18 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
-
+ /* RB_MSAA_CONTROL */
*cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
_SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
-
+ /* RB_DEPTH_CONTROL */
*cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_NEVER);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
-
+ /* RB_STENCIL_CONTROL */
*cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_NEVER) |
_SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
_SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
@@ -661,12 +754,12 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
-
+ /* GRAS_SU_MODE_CONTROL */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
-
+ /* RB_MRT_CONTROL0 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_ROP_CODE, 12) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
@@ -674,49 +767,49 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
-
+ /* RB_MRT_BLEND_CONTROL0 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL1 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
-
+ /* RB_MRT_BLEND_CONTROL1 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL2 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
-
+ /* RB_MRT_BLEND_CONTROL2 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_ALPHA_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL3 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
-
+ /* RB_MRT_BLEND_CONTROL3 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
@@ -726,36 +819,36 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
-
+ /* VFD_INDEX_MIN */
*cmds++ = 0x00000000;
-
+ /* VFD_INDEX_MAX */
*cmds++ = 0x155;
-
+ /* VFD_INSTANCEID_OFFSET */
*cmds++ = 0x00000000;
-
+ /* VFD_INDEX_OFFSET */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
-
+ /* VFD_VS_THREADING_THRESHOLD */
*cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
_SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
-
+ /* TPL1_TP_VS_TEX_OFFSET */
*cmds++ = 0;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
-
+ /* TPL1_TP_FS_TEX_OFFSET */
*cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
_SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
_SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
-
+ /* PC_PRIM_VTX_CNTL */
*cmds++ = _SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
PC_DRAW_TRIANGLES) |
_SET(PC_PRIM_VTX_CONTROL_POLYMODE_BACK_PTYPE,
@@ -764,41 +857,41 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
-
+ /* GRAS_SC_WINDOW_SCISSOR_TL */
*cmds++ = 0x00000000;
-
+ /* GRAS_SC_WINDOW_SCISSOR_BR */
*cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
_SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
-
+ /* GRAS_SC_SCREEN_SCISSOR_TL */
*cmds++ = 0x00000000;
-
+ /* GRAS_SC_SCREEN_SCISSOR_BR */
*cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
_SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
-
+ /* GRAS_CL_VPORT_XOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_XSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3f800000);
-
+ /* GRAS_CL_VPORT_YOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_YSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3f800000);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
-
+ /* GRAS_CL_VPORT_ZOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_ZSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3f800000);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
-
+ /* GRAS_CL_CLIP_CNTL */
*cmds++ = _SET(GRAS_CL_CLIP_CNTL_CLIP_DISABLE, 1) |
_SET(GRAS_CL_CLIP_CNTL_ZFAR_CLIP_DISABLE, 1) |
_SET(GRAS_CL_CLIP_CNTL_VP_CLIP_CODE_IGNORE, 1) |
@@ -807,14 +900,14 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_CL_GB_CLIP_ADJ);
-
+ /* GRAS_CL_GB_CLIP_ADJ */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
-
+ /* oxili_generate_context_roll_packets */
*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
*cmds++ = 0x00000400;
@@ -822,12 +915,12 @@
*cmds++ = 0x00000400;
*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00008000;
+ *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00008000;
+ *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
-
+ /* Clear cache invalidate bit when re-loading the shader control regs */
*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
@@ -846,38 +939,43 @@
_SET(SP_FSCTRLREG0_FSLENGTH, 1);
*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
-
+ /* end oxili_generate_context_roll_packets */
+ /*
+ * Resolve using two draw calls with a dummy register
+ * write in between. This is a HLM workaround
+ * that should be removed later.
+ */
*cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* Viz query info */
*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
PC_DI_SRC_SEL_IMMEDIATE,
PC_DI_INDEX_SIZE_32_BIT,
PC_DI_IGNORE_VISIBILITY);
- *cmds++ = 0x00000003;
- *cmds++ = 0x00000000;
- *cmds++ = 0x00000001;
- *cmds++ = 0x00000002;
+ *cmds++ = 0x00000003; /* Num indices */
+ *cmds++ = 0x00000000; /* Index 0 */
+ *cmds++ = 0x00000001; /* Index 1 */
+ *cmds++ = 0x00000002; /* Index 2 */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_DRAW_INDX_2, 6);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* Viz query info */
*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_TRILIST,
PC_DI_SRC_SEL_IMMEDIATE,
PC_DI_INDEX_SIZE_32_BIT,
PC_DI_IGNORE_VISIBILITY);
- *cmds++ = 0x00000003;
- *cmds++ = 0x00000002;
- *cmds++ = 0x00000001;
- *cmds++ = 0x00000003;
+ *cmds++ = 0x00000003; /* Num indices */
+ *cmds++ = 0x00000002; /* Index 0 */
+ *cmds++ = 0x00000001; /* Index 1 */
+ *cmds++ = 0x00000003; /* Index 2 */
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_HLSQ_CL_CONTROL_0_REG);
@@ -886,7 +984,7 @@
*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmds++ = 0x00000000;
-
+ /* Create indirect buffer command for above command sequence */
create_ib1(drawctxt, shadow->gmem_save, start, cmds);
return cmds;
@@ -897,7 +995,7 @@
unsigned int *cmd = tmp_ctx.cmd;
unsigned int *start;
-
+ /* Reserve space for boolean values used for COND_EXEC packet */
drawctxt->cond_execs[0].hostptr = cmd;
drawctxt->cond_execs[0].gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
*cmd++ = 0;
@@ -916,42 +1014,78 @@
start = cmd;
-
+ /* Save vertex shader */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
*cmd++ = 0x0000FFFF;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
drawctxt->shader_save_commands[2].hostptr = cmd;
drawctxt->shader_save_commands[2].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
+
+ dwords = SP_VS_CTRL_REG0.VS_LENGTH * 8
+
+ From regspec:
+ SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
+ If bit31 is 1, it means overflow
+ or any long shader.
+
+ src = (HLSQ_SHADOW_BASE + 0x1000)/4
+ */
+ *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
-
+ /* Save fragment shader */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
*cmd++ = 0x0000FFFF;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
drawctxt->shader_save_commands[3].hostptr = cmd;
drawctxt->shader_save_commands[3].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
+
+ dwords = SP_FS_CTRL_REG0.FS_LENGTH * 8
+
+ From regspec:
+ SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
+ If bit31 is 1, it means overflow
+ or any long shader.
+
+ fs_offset = SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC * 32
+ From regspec:
+
+ SP_FS_OBJ_OFFSET_REG.SHADEROBJOFFSETINIC [31:25]:
+ First instruction of the whole shader will be stored from
+ the offset in instruction cache, unit = 256bits, a cache line.
+ It can start from 0 if no VS available.
+
+ src = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE + fs_offset)/4
+ */
+ *cmd++ = 0; /*(dwords << REG_TO_MEM_LOOP_COUNT_SHIFT) | src */
*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
+ (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
-
+ /* Create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->shader_save, start, cmd);
tmp_ctx.cmd = cmd;
}
+/*
+ * Make an IB to modify context save IBs with the correct shader instruction
+ * and constant sizes and offsets.
+ */
static void build_save_fixup_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
@@ -959,26 +1093,26 @@
unsigned int *cmd = tmp_ctx.cmd;
unsigned int *start = cmd;
-
+ /* Flush HLSQ lazy updates */
*cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
- *cmd++ = 0x7;
+ *cmd++ = 0x7; /* HLSQ_FLUSH */
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
*cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
- *cmd++ = 0x00000000;
+ *cmd++ = 0x00000000; /* No start addr for full invalidate */
*cmd++ = (unsigned int)
UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
- 0;
+ 0; /* No end addr for full invalidate */
-
+ /* Make sure registers are flushed */
*cmd++ = cp_type3_packet(CP_CONTEXT_UPDATE, 1);
*cmd++ = 0;
#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
-
+ /* Save shader sizes */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_VS_CTRL_REG0;
*cmd++ = drawctxt->shader_save_commands[2].gpuaddr;
@@ -987,12 +1121,12 @@
*cmd++ = A3XX_SP_FS_CTRL_REG0;
*cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
-
+ /* Save shader offsets */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
*cmd++ = drawctxt->shader_save_commands[1].gpuaddr;
-
+ /* Save constant sizes */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_VS_CTRL_REG1;
*cmd++ = drawctxt->constant_save_commands[1].gpuaddr;
@@ -1000,59 +1134,72 @@
*cmd++ = A3XX_SP_FS_CTRL_REG1;
*cmd++ = drawctxt->constant_save_commands[2].gpuaddr;
-
+ /* Save FS constant offset */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
*cmd++ = drawctxt->constant_save_commands[0].gpuaddr;
-
+ /* Save VS instruction store mode */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_VS_CTRL_REG0;
*cmd++ = drawctxt->cond_execs[0].gpuaddr;
-
+ /* Save FS instruction store mode */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_FS_CTRL_REG0;
*cmd++ = drawctxt->cond_execs[1].gpuaddr;
#else
-
+ /* Shader save */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
11+REG_TO_MEM_LOOP_COUNT_SHIFT,
(HLSQ_SHADOW_BASE + 0x1000) / 4,
drawctxt->shader_save_commands[2].gpuaddr);
-
+ /* CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0x00000000) | SP_FS_CTRL_REG0 */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
- *cmd++ = 0x00000000;
- *cmd++ = A3XX_SP_FS_CTRL_REG0;
+ *cmd++ = 0x00000000; /* AND value */
+ *cmd++ = A3XX_SP_FS_CTRL_REG0; /* OR address */
+ /* CP_SCRATCH_REG2 = ( (CP_SCRATCH_REG2 & 0x7f000000) >> 21 )
+ | ((HLSQ_SHADOW_BASE+0x1000+SSIZE)/4) */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = ((11 + REG_TO_MEM_LOOP_COUNT_SHIFT) << 24) |
A3XX_CP_SCRATCH_REG2;
- *cmd++ = 0x7f000000;
- *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4;
+ *cmd++ = 0x7f000000; /* AND value */
+ *cmd++ = (HLSQ_SHADOW_BASE + 0x1000 + SSIZE) / 4; /* OR value */
+ /*
+ * CP_SCRATCH_REG3 = (CP_SCRATCH_REG3 & 0x00000000) |
+ * SP_FS_OBJ_OFFSET_REG
+ */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG3;
- *cmd++ = 0x00000000;
- *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
+ *cmd++ = 0x00000000; /* AND value */
+ *cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG; /* OR address */
+ /*
+ * CP_SCRATCH_REG3 = ( (CP_SCRATCH_REG3 & 0xfe000000) >> 25 ) |
+ * 0x00000000
+ */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = A3XX_CP_SCRATCH_REG3;
- *cmd++ = 0xfe000000;
- *cmd++ = 0x00000000;
+ *cmd++ = 0xfe000000; /* AND value */
+ *cmd++ = 0x00000000; /* OR value */
+ /*
+ * CP_SCRATCH_REG2 = (CP_SCRATCH_REG2 & 0xffffffff) | CP_SCRATCH_REG3
+ */
*cmd++ = cp_type3_packet(CP_REG_RMW, 3);
*cmd++ = (1 << 30) | A3XX_CP_SCRATCH_REG2;
- *cmd++ = 0xffffffff;
- *cmd++ = A3XX_CP_SCRATCH_REG3;
+ *cmd++ = 0xffffffff; /* AND value */
+ *cmd++ = A3XX_CP_SCRATCH_REG3; /* OR address */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_CP_SCRATCH_REG2;
*cmd++ = drawctxt->shader_save_commands[3].gpuaddr;
-
+ /* Constant save */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
2 + REG_TO_MEM_LOOP_COUNT_SHIFT,
(HLSQ_SHADOW_BASE + 0x2000) / 4,
@@ -1068,19 +1215,19 @@
drawctxt->constant_save_commands[2].gpuaddr
+ sizeof(unsigned int));
-
+ /* Modify constant save conditionals */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
0, 0, drawctxt->cond_execs[2].gpuaddr);
cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
0, 0, drawctxt->cond_execs[3].gpuaddr);
-
+ /* Save VS instruction store mode */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x00000002,
31, 0, drawctxt->cond_execs[0].gpuaddr);
-
+ /* Save FS instruction store mode */
cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG0, 0x00000002,
31, 0, drawctxt->cond_execs[1].gpuaddr);
@@ -1091,6 +1238,9 @@
tmp_ctx.cmd = cmd;
}
+/****************************************************************************/
+/* Functions to build context restore IBs */
+/****************************************************************************/
static unsigned int *build_sys2gmem_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
@@ -1104,31 +1254,31 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
-
+ /* HLSQ_CONTROL_0_REG */
*cmds++ = _SET(HLSQ_CTRL0REG_FSTHREADSIZE, HLSQ_FOUR_PIX_QUADS) |
_SET(HLSQ_CTRL0REG_FSSUPERTHREADENABLE, 1) |
_SET(HLSQ_CTRL0REG_SPSHADERRESTART, 1) |
_SET(HLSQ_CTRL0REG_CHUNKDISABLE, 1) |
_SET(HLSQ_CTRL0REG_SPCONSTFULLUPDATE, 1);
-
+ /* HLSQ_CONTROL_1_REG */
*cmds++ = _SET(HLSQ_CTRL1REG_VSTHREADSIZE, HLSQ_TWO_VTX_QUADS) |
_SET(HLSQ_CTRL1REG_VSSUPERTHREADENABLE, 1);
-
+ /* HLSQ_CONTROL_2_REG */
*cmds++ = _SET(HLSQ_CTRL2REG_PRIMALLOCTHRESHOLD, 31);
-
+ /* HLSQ_CONTROL3_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BUF_INFO0);
-
+ /* RB_MRT_BUF_INFO0 */
*cmds++ = _SET(RB_MRTBUFINFO_COLOR_FORMAT, RB_R8G8B8A8_UNORM) |
_SET(RB_MRTBUFINFO_COLOR_TILE_MODE, RB_TILINGMODE_32X32) |
_SET(RB_MRTBUFINFO_COLOR_BUF_PITCH,
(shadow->gmem_pitch * 4 * 8) / 256);
-
+ /* RB_MRT_BUF_BASE0 */
*cmds++ = _SET(RB_MRTBUFBASE_COLOR_BUF_BASE, tmp_ctx.gmem_base >> 5);
-
+ /* Texture samplers */
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
*cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
@@ -1142,7 +1292,7 @@
*cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
*cmds++ = 0x00000000;
-
+ /* Texture memobjs */
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 6);
*cmds++ = (16 << CP_LOADSTATE_DSTOFFSET_SHIFT)
| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
@@ -1158,7 +1308,7 @@
*cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
*cmds++ = 0x00000000;
-
+ /* Mipmap bases */
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 16);
*cmds++ = (224 << CP_LOADSTATE_DSTOFFSET_SHIFT)
| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
@@ -1186,74 +1336,74 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_HLSQ_VS_CONTROL_REG);
-
+ /* HLSQ_VS_CONTROL_REG */
*cmds++ = _SET(HLSQ_VSCTRLREG_VSINSTRLENGTH, 1);
-
+ /* HLSQ_FS_CONTROL_REG */
*cmds++ = _SET(HLSQ_FSCTRLREG_FSCONSTLENGTH, 1) |
_SET(HLSQ_FSCTRLREG_FSCONSTSTARTOFFSET, 128) |
_SET(HLSQ_FSCTRLREG_FSINSTRLENGTH, 2);
-
+ /* HLSQ_CONST_VSPRESV_RANGE_REG */
*cmds++ = 0x00000000;
-
+ /* HLSQ_CONST_FSPRESV_RANGE_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_FS_LENGTH_REG);
-
+ /* SP_FS_LENGTH_REG */
*cmds++ = _SET(SP_SHADERLENGTH_LEN, 2);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 12);
*cmds++ = CP_REG(A3XX_SP_VS_CTRL_REG0);
-
+ /* SP_VS_CTRL_REG0 */
*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
_SET(SP_VSCTRLREG0_VSICACHEINVALID, 1) |
_SET(SP_VSCTRLREG0_VSFULLREGFOOTPRINT, 2) |
_SET(SP_VSCTRLREG0_VSTHREADSIZE, SP_TWO_VTX_QUADS) |
_SET(SP_VSCTRLREG0_VSLENGTH, 1);
-
+ /* SP_VS_CTRL_REG1 */
*cmds++ = _SET(SP_VSCTRLREG1_VSINITIALOUTSTANDING, 8);
-
+ /* SP_VS_PARAM_REG */
*cmds++ = _SET(SP_VSPARAMREG_POSREGID, 4) |
_SET(SP_VSPARAMREG_PSIZEREGID, 252) |
_SET(SP_VSPARAMREG_TOTALVSOUTVAR, 1);
-
+ /* SP_VS_OUT_REG0 */
*cmds++ = _SET(SP_VSOUTREG_COMPMASK0, 3);
-
+ /* SP_VS_OUT_REG1 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG2 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG3 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG4 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG5 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG6 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OUT_REG7 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 7);
*cmds++ = CP_REG(A3XX_SP_VS_VPC_DST_REG_0);
-
+ /* SP_VS_VPC_DST_REG0 */
*cmds++ = _SET(SP_VSVPCDSTREG_OUTLOC0, 8);
-
+ /* SP_VS_VPC_DST_REG1 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_VPC_DST_REG2 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_VPC_DST_REG3 */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OBJ_OFFSET_REG */
*cmds++ = 0x00000000;
-
+ /* SP_VS_OBJ_START_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 6);
*cmds++ = CP_REG(A3XX_SP_VS_LENGTH_REG);
-
+ /* SP_VS_LENGTH_REG */
*cmds++ = _SET(SP_SHADERLENGTH_LEN, 1);
-
+ /* SP_FS_CTRL_REG0 */
*cmds++ = _SET(SP_FSCTRLREG0_FSTHREADMODE, SP_MULTI) |
_SET(SP_FSCTRLREG0_FSINSTRBUFFERMODE, SP_BUFFER_MODE) |
_SET(SP_FSCTRLREG0_FSICACHEINVALID, 1) |
@@ -1264,57 +1414,57 @@
_SET(SP_FSCTRLREG0_FSSUPERTHREADMODE, 1) |
_SET(SP_FSCTRLREG0_PIXLODENABLE, 1) |
_SET(SP_FSCTRLREG0_FSLENGTH, 2);
-
+ /* SP_FS_CTRL_REG1 */
*cmds++ = _SET(SP_FSCTRLREG1_FSCONSTLENGTH, 1) |
_SET(SP_FSCTRLREG1_FSINITIALOUTSTANDING, 2) |
_SET(SP_FSCTRLREG1_HALFPRECVAROFFSET, 63);
-
+ /* SP_FS_OBJ_OFFSET_REG */
*cmds++ = _SET(SP_OBJOFFSETREG_CONSTOBJECTSTARTOFFSET, 128) |
_SET(SP_OBJOFFSETREG_SHADEROBJOFFSETINIC, 126);
-
+ /* SP_FS_OBJ_START_REG */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0);
-
+ /* SP_FS_FLAT_SHAD_MODE_REG0 */
*cmds++ = 0x00000000;
-
+ /* SP_FS_FLAT_SHAD_MODE_REG1 */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_FS_OUTPUT_REG);
-
+ /* SP_FS_OUT_REG */
*cmds++ = _SET(SP_FSOUTREG_PAD0, SP_PIXEL_BASED);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_SP_FS_MRT_REG_0);
-
+ /* SP_FS_MRT_REG0 */
*cmds++ = _SET(SP_FSMRTREG_PRECISION, 1);
-
+ /* SP_FS_MRT_REG1 */
*cmds++ = 0;
-
+ /* SP_FS_MRT_REG2 */
*cmds++ = 0;
-
+ /* SP_FS_MRT_REG3 */
*cmds++ = 0;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 11);
*cmds++ = CP_REG(A3XX_VPC_ATTR);
-
+ /* VPC_ATTR */
*cmds++ = _SET(VPC_VPCATTR_TOTALATTR, 2) |
_SET(VPC_VPCATTR_THRHDASSIGN, 1) |
_SET(VPC_VPCATTR_LMSIZE, 1);
-
+ /* VPC_PACK */
*cmds++ = _SET(VPC_VPCPACK_NUMFPNONPOSVAR, 2) |
_SET(VPC_VPCPACK_NUMNONPOSVSVAR, 2);
-
+ /* VPC_VARYING_INTERP_MODE_0 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_INTERP_MODE1 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_INTERP_MODE2 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARYING_IINTERP_MODE3 */
*cmds++ = 0x00000000;
-
+ /* VPC_VARRYING_PS_REPL_MODE_0 */
*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
@@ -1331,7 +1481,7 @@
_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
-
+ /* VPC_VARRYING_PS_REPL_MODE_1 */
*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
@@ -1348,7 +1498,7 @@
_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
-
+ /* VPC_VARRYING_PS_REPL_MODE_2 */
*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
@@ -1365,7 +1515,7 @@
_SET(VPC_VPCVARPSREPLMODE_COMPONENT15, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT16, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT17, 2);
-
+ /* VPC_VARRYING_PS_REPL_MODE_3 */
*cmds++ = _SET(VPC_VPCVARPSREPLMODE_COMPONENT08, 1) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT09, 2) |
_SET(VPC_VPCVARPSREPLMODE_COMPONENT0A, 1) |
@@ -1385,11 +1535,11 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_SP_CTRL_REG);
-
+ /* SP_SP_CTRL_REG */
*cmds++ = _SET(SP_SPCTRLREG_SLEEPMODE, 1) |
_SET(SP_SPCTRLREG_LOMODE, 1);
-
+ /* Load vertex shader */
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 10);
*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
@@ -1397,13 +1547,13 @@
| (1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
*cmds++ = (HLSQ_SP_VS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
-
+ /* (sy)end; */
*cmds++ = 0x00000000; *cmds++ = 0x13001000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
*cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
@@ -1413,7 +1563,7 @@
*cmds++ = 0x00000000;
-
+ /* Load fragment shader */
*cmds++ = cp_type3_packet(CP_LOAD_STATE, 18);
*cmds++ = (0 << CP_LOADSTATE_DSTOFFSET_SHIFT)
| (HLSQ_DIRECT << CP_LOADSTATE_STATESRC_SHIFT)
@@ -1421,21 +1571,21 @@
| (2 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
*cmds++ = (HLSQ_SP_FS_INSTR << CP_LOADSTATE_STATETYPE_SHIFT)
| (0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
-
+ /* (sy)(rpt1)bary.f (ei)r0.z, (r)0, r0.x; */
*cmds++ = 0x00002000; *cmds++ = 0x57309902;
-
+ /* (rpt5)nop; */
*cmds++ = 0x00000000; *cmds++ = 0x00000500;
-
+ /* sam (f32)r0.xyzw, r0.z, s#0, t#0; */
*cmds++ = 0x00000005; *cmds++ = 0xa0c01f00;
-
+ /* (sy)mov.f32f32 r1.x, r0.x; */
*cmds++ = 0x00000000; *cmds++ = 0x30040b00;
-
+ /* mov.f32f32 r1.y, r0.y; */
*cmds++ = 0x00000000; *cmds++ = 0x03000000;
-
+ /* mov.f32f32 r1.z, r0.z; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* mov.f32f32 r1.w, r0.w; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
-
+ /* end; */
*cmds++ = 0x00000000; *cmds++ = 0x00000000;
*cmds++ = cp_type0_packet(A3XX_VFD_PERFCOUNTER0_SELECT, 1);
@@ -1446,45 +1596,45 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_VFD_CONTROL_0);
-
+ /* VFD_CONTROL_0 */
*cmds++ = _SET(VFD_CTRLREG0_TOTALATTRTOVS, 8) |
_SET(VFD_CTRLREG0_PACKETSIZE, 2) |
_SET(VFD_CTRLREG0_STRMDECINSTRCNT, 2) |
_SET(VFD_CTRLREG0_STRMFETCHINSTRCNT, 2);
-
+ /* VFD_CONTROL_1 */
*cmds++ = _SET(VFD_CTRLREG1_MAXSTORAGE, 2) |
_SET(VFD_CTRLREG1_REGID4VTX, 252) |
_SET(VFD_CTRLREG1_REGID4INST, 252);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_VFD_FETCH_INSTR_0_0);
-
+ /* VFD_FETCH_INSTR_0_0 */
*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 7) |
_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 8) |
_SET(VFD_FETCHINSTRUCTIONS_SWITCHNEXT, 1) |
_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
-
+ /* VFD_FETCH_INSTR_1_0 */
*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
shadow->quad_vertices_restore.gpuaddr);
-
+ /* VFD_FETCH_INSTR_0_1 */
*cmds++ = _SET(VFD_FETCHINSTRUCTIONS_FETCHSIZE, 11) |
_SET(VFD_FETCHINSTRUCTIONS_BUFSTRIDE, 12) |
_SET(VFD_FETCHINSTRUCTIONS_INDEXDECODE, 1) |
_SET(VFD_FETCHINSTRUCTIONS_STEPRATE, 1);
-
+ /* VFD_FETCH_INSTR_1_1 */
*cmds++ = _SET(VFD_BASEADDR_BASEADDR,
shadow->quad_vertices_restore.gpuaddr + 16);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_VFD_DECODE_INSTR_0);
-
+ /* VFD_DECODE_INSTR_0 */
*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 1) |
_SET(VFD_DECODEINSTRUCTIONS_SHIFTCNT, 8) |
_SET(VFD_DECODEINSTRUCTIONS_LASTCOMPVALID, 1) |
_SET(VFD_DECODEINSTRUCTIONS_SWITCHNEXT, 1);
-
+ /* VFD_DECODE_INSTR_1 */
*cmds++ = _SET(VFD_DECODEINSTRUCTIONS_WRITEMASK, 0x0F) |
_SET(VFD_DECODEINSTRUCTIONS_CONSTFILL, 1) |
_SET(VFD_DECODEINSTRUCTIONS_FORMAT, 2) |
@@ -1494,12 +1644,12 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_DEPTH_CONTROL);
-
+ /* RB_DEPTH_CONTROL */
*cmds++ = _SET(RB_DEPTHCONTROL_Z_TEST_FUNC, RB_FRAG_LESS);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_STENCIL_CONTROL);
-
+ /* RB_STENCIL_CONTROL */
*cmds++ = _SET(RB_STENCILCONTROL_STENCIL_FUNC, RB_REF_ALWAYS) |
_SET(RB_STENCILCONTROL_STENCIL_FAIL, RB_STENCIL_KEEP) |
_SET(RB_STENCILCONTROL_STENCIL_ZPASS, RB_STENCIL_KEEP) |
@@ -1511,32 +1661,32 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MODE_CONTROL);
-
+ /* RB_MODE_CONTROL */
*cmds++ = _SET(RB_MODECONTROL_RENDER_MODE, RB_RENDERING_PASS) |
_SET(RB_MODECONTROL_MARB_CACHE_SPLIT_MODE, 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_RENDER_CONTROL);
-
+ /* RB_RENDER_CONTROL */
*cmds++ = _SET(RB_RENDERCONTROL_BIN_WIDTH, shadow->width >> 5) |
_SET(RB_RENDERCONTROL_ALPHA_TEST_FUNC, 7);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MSAA_CONTROL);
-
+ /* RB_MSAA_CONTROL */
*cmds++ = _SET(RB_MSAACONTROL_MSAA_DISABLE, 1) |
_SET(RB_MSAACONTROL_SAMPLE_MASK, 0xFFFF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MRT_CONTROL0);
-
+ /* RB_MRT_CONTROL0 */
*cmds++ = _SET(RB_MRTCONTROL_ROP_CODE, 12) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_DISABLE) |
_SET(RB_MRTCONTROL_COMPONENT_ENABLE, 0xF);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL0);
-
+ /* RB_MRT_BLENDCONTROL0 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
@@ -1544,7 +1694,7 @@
_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL1 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_ROP_CODE, 12) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
@@ -1552,7 +1702,7 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL1);
-
+ /* RB_MRT_BLENDCONTROL1 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
@@ -1560,7 +1710,7 @@
_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL2 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_ROP_CODE, 12) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
@@ -1568,7 +1718,7 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL2);
-
+ /* RB_MRT_BLENDCONTROL2 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
@@ -1576,7 +1726,7 @@
_SET(RB_MRTBLENDCONTROL_ALPHA_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_ALPHA_DEST_FACTOR, RB_FACTOR_ZERO) |
_SET(RB_MRTBLENDCONTROL_CLAMP_ENABLE, 1);
-
+ /* RB_MRT_CONTROL3 */
*cmds++ = _SET(RB_MRTCONTROL_READ_DEST_ENABLE, 1) |
_SET(RB_MRTCONTROL_ROP_CODE, 12) |
_SET(RB_MRTCONTROL_DITHER_MODE, RB_DITHER_ALWAYS) |
@@ -1584,7 +1734,7 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_RB_MRT_BLEND_CONTROL3);
-
+ /* RB_MRT_BLENDCONTROL3 */
*cmds++ = _SET(RB_MRTBLENDCONTROL_RGB_SRC_FACTOR, RB_FACTOR_ONE) |
_SET(RB_MRTBLENDCONTROL_RGB_BLEND_OPCODE, RB_BLEND_OP_ADD) |
_SET(RB_MRTBLENDCONTROL_RGB_DEST_FACTOR, RB_FACTOR_ZERO) |
@@ -1595,90 +1745,92 @@
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_VFD_INDEX_MIN);
-
+ /* VFD_INDEX_MIN */
*cmds++ = 0x00000000;
-
+ /* VFD_INDEX_MAX */
*cmds++ = 340;
-
+ /* VFD_INDEX_OFFSET */
*cmds++ = 0x00000000;
-
+ /* TPL1_TP_VS_TEX_OFFSET */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_VFD_VS_THREADING_THRESHOLD);
-
+ /* VFD_VS_THREADING_THRESHOLD */
*cmds++ = _SET(VFD_THREADINGTHRESHOLD_REGID_THRESHOLD, 15) |
_SET(VFD_THREADINGTHRESHOLD_REGID_VTXCNT, 252);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_TPL1_TP_VS_TEX_OFFSET);
-
+ /* TPL1_TP_VS_TEX_OFFSET */
*cmds++ = 0x00000000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_TPL1_TP_FS_TEX_OFFSET);
-
+ /* TPL1_TP_FS_TEX_OFFSET */
*cmds++ = _SET(TPL1_TPTEXOFFSETREG_SAMPLEROFFSET, 16) |
_SET(TPL1_TPTEXOFFSETREG_MEMOBJOFFSET, 16) |
_SET(TPL1_TPTEXOFFSETREG_BASETABLEPTR, 224);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_SC_CONTROL);
-
+ /* GRAS_SC_CONTROL */
+ /*cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1);
+ *cmds++ = _SET(GRAS_SC_CONTROL_RASTER_MODE, 1) |*/
*cmds++ = 0x04001000;
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_SU_MODE_CONTROL);
-
+ /* GRAS_SU_MODE_CONTROL */
*cmds++ = _SET(GRAS_SU_CTRLMODE_LINEHALFWIDTH, 2);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_SC_WINDOW_SCISSOR_TL);
-
+ /* GRAS_SC_WINDOW_SCISSOR_TL */
*cmds++ = 0x00000000;
-
+ /* GRAS_SC_WINDOW_SCISSOR_BR */
*cmds++ = _SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_X, shadow->width - 1) |
_SET(GRAS_SC_WINDOW_SCISSOR_BR_BR_Y, shadow->height - 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_SC_SCREEN_SCISSOR_TL);
-
+ /* GRAS_SC_SCREEN_SCISSOR_TL */
*cmds++ = 0x00000000;
-
+ /* GRAS_SC_SCREEN_SCISSOR_BR */
*cmds++ = _SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_X, shadow->width - 1) |
_SET(GRAS_SC_SCREEN_SCISSOR_BR_BR_Y, shadow->height - 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 5);
*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_XOFFSET);
-
+ /* GRAS_CL_VPORT_XOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_XSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_XSCALE_VPORT_XSCALE, 0x3F800000);
-
+ /* GRAS_CL_VPORT_YOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_YSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_YSCALE_VPORT_YSCALE, 0x3F800000);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 3);
*cmds++ = CP_REG(A3XX_GRAS_CL_VPORT_ZOFFSET);
-
+ /* GRAS_CL_VPORT_ZOFFSET */
*cmds++ = 0x00000000;
-
+ /* GRAS_CL_VPORT_ZSCALE */
*cmds++ = _SET(GRAS_CL_VPORT_ZSCALE_VPORT_ZSCALE, 0x3F800000);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_GRAS_CL_CLIP_CNTL);
-
+ /* GRAS_CL_CLIP_CNTL */
*cmds++ = _SET(GRAS_CL_CLIP_CNTL_IJ_PERSP_CENTER, 1);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_SP_FS_IMAGE_OUTPUT_REG_0);
-
+ /* SP_FS_IMAGE_OUTPUT_REG_0 */
*cmds++ = _SET(SP_IMAGEOUTPUTREG_MRTFORMAT, SP_R8G8B8A8_UNORM);
*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmds++ = CP_REG(A3XX_PC_PRIM_VTX_CNTL);
-
+ /* PC_PRIM_VTX_CONTROL */
*cmds++ = _SET(PC_PRIM_VTX_CONTROL_STRIDE_IN_VPC, 2) |
_SET(PC_PRIM_VTX_CONTROL_POLYMODE_FRONT_PTYPE,
PC_DRAW_TRIANGLES) |
@@ -1687,7 +1839,7 @@
_SET(PC_PRIM_VTX_CONTROL_PROVOKING_VTX_LAST, 1);
-
+ /* oxili_generate_context_roll_packets */
*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
*cmds++ = 0x00000400;
@@ -1695,12 +1847,12 @@
*cmds++ = 0x00000400;
*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00008000;
+ *cmds++ = 0x00008000; /* SP_VS_MEM_SIZE_REG */
*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00008000;
+ *cmds++ = 0x00008000; /* SP_FS_MEM_SIZE_REG */
-
+ /* Clear cache invalidate bit when re-loading the shader control regs */
*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
*cmds++ = _SET(SP_VSCTRLREG0_VSTHREADMODE, SP_MULTI) |
_SET(SP_VSCTRLREG0_VSINSTRBUFFERMODE, SP_BUFFER_MODE) |
@@ -1719,22 +1871,22 @@
_SET(SP_FSCTRLREG0_FSLENGTH, 2);
*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* SP_VS_MEM_SIZE_REG */
*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* SP_FS_MEM_SIZE_REG */
-
+ /* end oxili_generate_context_roll_packets */
*cmds++ = cp_type3_packet(CP_DRAW_INDX, 3);
- *cmds++ = 0x00000000;
+ *cmds++ = 0x00000000; /* Viz query info */
*cmds++ = BUILD_PC_DRAW_INITIATOR(PC_DI_PT_RECTLIST,
PC_DI_SRC_SEL_AUTO_INDEX,
PC_DI_INDEX_SIZE_16_BIT,
PC_DI_IGNORE_VISIBILITY);
- *cmds++ = 0x00000002;
+ *cmds++ = 0x00000002; /* Num indices */
-
+ /* Create indirect buffer command for above command sequence */
create_ib1(drawctxt, shadow->gmem_restore, start, cmds);
return cmds;
@@ -1750,26 +1902,26 @@
int i;
-
+ /* Flush HLSQ lazy updates */
*cmd++ = cp_type3_packet(CP_EVENT_WRITE, 1);
- *cmd++ = 0x7;
+ *cmd++ = 0x7; /* HLSQ_FLUSH */
*cmd++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
*cmd++ = 0;
*cmd++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
- *cmd++ = 0x00000000;
+ *cmd++ = 0x00000000; /* No start addr for full invalidate */
*cmd++ = (unsigned int)
UCHE_ENTIRE_CACHE << UCHE_INVALIDATE1REG_ALLORPORTION |
UCHE_OP_INVALIDATE << UCHE_INVALIDATE1REG_OPCODE |
- 0;
+ 0; /* No end addr for full invalidate */
lcc_start = cmd;
-
+ /* deferred cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, ???); */
cmd++;
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-
+ /* Force mismatch */
*cmd++ = ((drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000) | 1;
#else
*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
@@ -1784,7 +1936,7 @@
(cmd - lcc_start) - 1);
#ifdef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
- lcc_start[2] |= (0 << 24) | (4 << 16);
+ lcc_start[2] |= (0 << 24) | (4 << 16); /* Disable shadowing. */
#else
lcc_start[2] |= (1 << 24) | (4 << 16);
#endif
@@ -1804,7 +1956,7 @@
{
unsigned int *cmd = tmp_ctx.cmd;
unsigned int *start = cmd;
- unsigned int mode = 4;
+ unsigned int mode = 4; /* Indirect mode */
unsigned int stateblock;
unsigned int numunits;
unsigned int statetype;
@@ -1822,52 +1974,90 @@
*cmd++ = 4 << 16;
*cmd++ = 0x0;
#endif
-
+ /* HLSQ full update */
*cmd++ = cp_type3_packet(CP_SET_CONSTANT, 2);
*cmd++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
- *cmd++ = 0x68000240;
+ *cmd++ = 0x68000240; /* A3XX_HLSQ_CONTROL_0_REG */
#ifndef CONFIG_MSM_KGSL_DISABLE_SHADOW_WRITES
-
+ /* Re-enable shadowing */
*cmd++ = cp_type3_packet(CP_LOAD_CONSTANT_CONTEXT, 3);
*cmd++ = (drawctxt->gpustate.gpuaddr + REG_OFFSET) & 0xFFFFE000;
*cmd++ = (4 << 16) | (1 << 24);
*cmd++ = 0x0;
#endif
-
+ /* Load vertex shader constants */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[2].gpuaddr >> 2;
*cmd++ = 0x0000ffff;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
drawctxt->constant_load_commands[0].hostptr = cmd;
drawctxt->constant_load_commands[0].gpuaddr = virt2gpu(cmd,
&drawctxt->gpustate);
+ /*
+ From fixup:
- *cmd++ = 0;
+ mode = 4 (indirect)
+ stateblock = 4 (Vertex constants)
+ numunits = SP_VS_CTRL_REG1.VSCONSTLENGTH * 2; (256bit units)
+
+ From register spec:
+ SP_VS_CTRL_REG1.VSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+
+ ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
+ */
+
+ *cmd++ = 0; /* ord1 */
*cmd++ = ((drawctxt->gpustate.gpuaddr) & 0xfffffffc) | 1;
-
+ /* Load fragment shader constants */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[3].gpuaddr >> 2;
*cmd++ = 0x0000ffff;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
drawctxt->constant_load_commands[1].hostptr = cmd;
drawctxt->constant_load_commands[1].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
+ /*
+ From fixup:
- *cmd++ = 0;
+ mode = 4 (indirect)
+ stateblock = 6 (Fragment constants)
+ numunits = SP_FS_CTRL_REG1.FSCONSTLENGTH * 2; (256bit units)
+
+ From register spec:
+ SP_FS_CTRL_REG1.FSCONSTLENGTH [09:00]: 0-512, unit = 128bits.
+
+ ord1 = (numunits<<22) | (stateblock<<19) | (mode<<16);
+ */
+
+ *cmd++ = 0; /* ord1 */
drawctxt->constant_load_commands[2].hostptr = cmd;
drawctxt->constant_load_commands[2].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
+ base = drawctxt->gpustate.gpuaddr (ALU constant shadow base)
+ offset = SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET
-
+ From register spec:
+ SP_FS_OBJ_OFFSET_REG.CONSTOBJECTSTARTOFFSET [16:24]: Constant object
+ start offset in on chip RAM,
+ 128bit aligned
+
+ ord2 = base + offset | 1
+ Because of the base alignment we can use
+ ord2 = base | offset | 1
+ */
+ *cmd++ = 0; /* ord2 */
+
+ /* Restore VS texture memory objects */
stateblock = 0;
statetype = 1;
numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
@@ -1877,7 +2067,7 @@
*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MEM_OBJECTS)
& 0xfffffffc) | statetype;
-
+ /* Restore VS texture mipmap addresses */
stateblock = 1;
statetype = 1;
numunits = TEX_SIZE_MIPMAP / 4;
@@ -1886,7 +2076,7 @@
*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_MIPMAP)
& 0xfffffffc) | statetype;
-
+ /* Restore VS texture sampler objects */
stateblock = 0;
statetype = 0;
numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
@@ -1895,7 +2085,7 @@
*cmd++ = ((drawctxt->gpustate.gpuaddr + VS_TEX_OFFSET_SAMPLER_OBJ)
& 0xfffffffc) | statetype;
-
+ /* Restore FS texture memory objects */
stateblock = 2;
statetype = 1;
numunits = (TEX_SIZE_MEM_OBJECTS / 7) / 4;
@@ -1904,7 +2094,7 @@
*cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MEM_OBJECTS)
& 0xfffffffc) | statetype;
-
+ /* Restore FS texture mipmap addresses */
stateblock = 3;
statetype = 1;
numunits = TEX_SIZE_MIPMAP / 4;
@@ -1913,7 +2103,7 @@
*cmd++ = ((drawctxt->gpustate.gpuaddr + FS_TEX_OFFSET_MIPMAP)
& 0xfffffffc) | statetype;
-
+ /* Restore FS texture sampler objects */
stateblock = 2;
statetype = 0;
numunits = (TEX_SIZE_SAMPLER_OBJ / 2) / 4;
@@ -1932,32 +2122,60 @@
unsigned int *cmd = tmp_ctx.cmd;
unsigned int *start = cmd;
-
+ /* Vertex shader */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[0].gpuaddr >> 2;
*cmd++ = 1;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
drawctxt->shader_load_commands[0].hostptr = cmd;
drawctxt->shader_load_commands[0].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
+
+ mode = 4 (indirect)
+ stateblock = 4 (Vertex shader)
+ numunits = SP_VS_CTRL_REG0.VS_LENGTH
+
+ From regspec:
+ SP_VS_CTRL_REG0.VS_LENGTH [31:24]: VS length, unit = 256bits.
+ If bit31 is 1, it means overflow
+ or any long shader.
+
+ ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
+ */
+ *cmd++ = 0; /*ord1 */
*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET) & 0xfffffffc;
-
+ /* Fragment shader */
*cmd++ = cp_type3_packet(CP_COND_EXEC, 4);
*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
*cmd++ = drawctxt->cond_execs[1].gpuaddr >> 2;
*cmd++ = 1;
- *cmd++ = 3;
+ *cmd++ = 3; /* EXEC_COUNT */
*cmd++ = cp_type3_packet(CP_LOAD_STATE, 2);
drawctxt->shader_load_commands[1].hostptr = cmd;
drawctxt->shader_load_commands[1].gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
- *cmd++ = 0;
+ /*
+ From fixup:
+
+ mode = 4 (indirect)
+ stateblock = 6 (Fragment shader)
+ numunits = SP_FS_CTRL_REG0.FS_LENGTH
+
+ From regspec:
+ SP_FS_CTRL_REG0.FS_LENGTH [31:24]: FS length, unit = 256bits.
+ If bit31 is 1, it means overflow
+ or any long shader.
+
+ ord1 = (numunits<<22) | (stateblock<<19) | (mode<<11)
+ */
+ *cmd++ = 0; /*ord1 */
*cmd++ = (drawctxt->gpustate.gpuaddr + SHADER_OFFSET
+ (SHADER_SHADOW_SIZE / 2)) & 0xfffffffc;
@@ -1978,12 +2196,13 @@
= virt2gpu(cmd, &drawctxt->gpustate);
*cmd++ = 0;
-
+ /* Create indirect buffer command for above command sequence */
create_ib1(drawctxt, drawctxt->hlsqcontrol_restore, start, cmd);
tmp_ctx.cmd = cmd;
}
+/* IB that modifies the shader and constant sizes and offsets in restore IBs. */
static void build_restore_fixup_cmds(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
{
@@ -1991,7 +2210,7 @@
unsigned int *start = cmd;
#ifdef GSL_CONTEXT_SWITCH_CPU_SYNC
-
+ /* Save shader sizes */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_VS_CTRL_REG0;
*cmd++ = drawctxt->shader_load_commands[0].gpuaddr;
@@ -2000,7 +2219,7 @@
*cmd++ = A3XX_SP_FS_CTRL_REG0;
*cmd++ = drawctxt->shader_load_commands[1].gpuaddr;
-
+ /* Save constant sizes */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_VS_CTRL_REG1;
*cmd++ = drawctxt->constant_load_commands[0].gpuaddr;
@@ -2009,12 +2228,12 @@
*cmd++ = A3XX_SP_FS_CTRL_REG1;
*cmd++ = drawctxt->constant_load_commands[1].gpuaddr;
-
+ /* Save constant offsets */
*cmd++ = cp_type3_packet(CP_REG_TO_MEM, 2);
*cmd++ = A3XX_SP_FS_OBJ_OFFSET_REG;
*cmd++ = drawctxt->constant_load_commands[2].gpuaddr;
#else
-
+ /* Save shader sizes */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG0, 0x7f000000,
30, (4 << 19) | (4 << 16),
drawctxt->shader_load_commands[0].gpuaddr);
@@ -2023,7 +2242,7 @@
30, (6 << 19) | (4 << 16),
drawctxt->shader_load_commands[1].gpuaddr);
-
+ /* Save constant sizes */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
23, (4 << 19) | (4 << 16),
drawctxt->constant_load_commands[0].gpuaddr);
@@ -2032,19 +2251,21 @@
23, (6 << 19) | (4 << 16),
drawctxt->constant_load_commands[1].gpuaddr);
-
+ /* Modify constant restore conditionals */
cmd = rmw_regtomem(cmd, A3XX_SP_VS_CTRL_REG1, 0x000003ff,
0, 0, drawctxt->cond_execs[2].gpuaddr);
cmd = rmw_regtomem(cmd, A3XX_SP_FS_CTRL_REG1, 0x000003ff,
0, 0, drawctxt->cond_execs[3].gpuaddr);
-
+ /* Save fragment constant shadow offset */
cmd = rmw_regtomem(cmd, A3XX_SP_FS_OBJ_OFFSET_REG, 0x00ff0000,
18, (drawctxt->gpustate.gpuaddr & 0xfffffe00) | 1,
drawctxt->constant_load_commands[2].gpuaddr);
#endif
+ /* Use mask value to avoid flushing HLSQ which would cause the HW to
+ discard all the shader data */
cmd = rmw_regtomem(cmd, A3XX_HLSQ_CONTROL_0_REG, 0x9ffffdff,
0, 0, drawctxt->hlsqcontrol_restore_commands[0].gpuaddr);
@@ -2071,6 +2292,7 @@
return 0;
}
+/* create buffers for saving/restoring registers, constants, & GMEM */
static int a3xx_create_gmem_shadow(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt)
{
@@ -2106,6 +2328,12 @@
{
int ret;
+ /*
+ * Allocate memory for the GPU state and the context commands.
+ * Despite the name, this is much more then just storage for
+ * the gpustate. This contains command space for gmem save
+ * and texture and vertex buffer storage too
+ */
ret = kgsl_allocate(&drawctxt->gpustate,
drawctxt->pagetable, CONTEXT_SIZE);
@@ -2139,7 +2367,7 @@
{
struct kgsl_device *device = &adreno_dev->dev;
- if (context == NULL)
+ if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
return;
if (context->flags & CTXT_FLAGS_GPU_HANG)
@@ -2147,17 +2375,17 @@
"Current active context has caused gpu hang\n");
if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-
+ /* Fixup self modifying IBs for save operations */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
-
+ /* save registers and constants. */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->regconstant_save, 3);
if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
-
+ /* Save shader instructions */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
@@ -2167,6 +2395,10 @@
if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
(context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+ /*
+ * Save GMEM (note: changes shader. shader must
+ * already be saved.)
+ */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_PMODE,
@@ -2183,7 +2415,7 @@
unsigned int cmds[5];
if (context == NULL) {
-
+ /* No context - set the default pagetable and thats it */
kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
adreno_dev->drawctxt_active->id);
return;
@@ -2201,6 +2433,10 @@
cmds, 5);
kgsl_mmu_setstate(&device->mmu, context->pagetable, context->id);
+ /*
+ * Restore GMEM. (note: changes shader.
+ * Shader must not already be restored.)
+ */
if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
adreno_ringbuffer_issuecmds(device, context,
@@ -2214,7 +2450,7 @@
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
-
+ /* Fixup self modifying IBs for restore operations */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->restore_fixup, 3);
@@ -2228,7 +2464,7 @@
KGSL_CMD_FLAGS_NONE,
context->shader_restore, 3);
-
+ /* Restore HLSQ_CONTROL_0 register */
adreno_ringbuffer_issuecmds(device, context,
KGSL_CMD_FLAGS_NONE,
context->hlsqcontrol_restore, 3);
@@ -2239,7 +2475,7 @@
struct adreno_ringbuffer *rb)
{
unsigned int *cmds, cmds_gpu;
- cmds = adreno_ringbuffer_allocspace(rb, 18);
+ cmds = adreno_ringbuffer_allocspace(rb, NULL, 18);
cmds_gpu = rb->buffer_desc.gpuaddr + sizeof(uint) * (rb->wptr - 18);
GSL_RB_WRITE(cmds, cmds_gpu, cp_type3_packet(CP_ME_INIT, 17));
@@ -2257,7 +2493,7 @@
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000001);
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
-
+ /* Protected mode control - turned off for A3XX */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
@@ -2276,6 +2512,10 @@
adreno_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, ®);
+ /*
+ * Return the word address of the erroring register so that it
+ * matches the register specification
+ */
KGSL_DRV_CRIT(device,
"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
@@ -2283,7 +2523,7 @@
(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
(reg >> 24) & 0x3);
-
+ /* Clear the error */
adreno_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
return;
}
@@ -2336,23 +2576,10 @@
{
struct kgsl_device *device = &adreno_dev->dev;
- if (irq == A3XX_INT_CP_RB_INT) {
- unsigned int context_id;
- kgsl_sharedmem_readl(&device->memstore, &context_id,
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- current_context));
- if (context_id < KGSL_MEMSTORE_MAX) {
- kgsl_sharedmem_writel(&device->memstore,
- KGSL_MEMSTORE_OFFSET(context_id,
- ts_cmp_enable), 0);
- wmb();
- }
- KGSL_CMD_WARN(device, "ringbuffer rb interrupt\n");
- }
-
+ /* Wake up everybody waiting for the interrupt */
wake_up_interruptible_all(&device->wait_queue);
-
+ /* Schedule work to free mem and issue ibs */
queue_work(device->work_queue, &device->ts_expired_ws);
atomic_notifier_call_chain(&device->ts_notifier_list,
@@ -2378,33 +2605,33 @@
static struct {
void (*func)(struct adreno_device *, int);
} a3xx_irq_funcs[] = {
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(a3xx_cp_callback),
- A3XX_IRQ_CALLBACK(a3xx_cp_callback),
- A3XX_IRQ_CALLBACK(a3xx_cp_callback),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(NULL),
- A3XX_IRQ_CALLBACK(a3xx_err_callback),
-
+ A3XX_IRQ_CALLBACK(NULL), /* 0 - RBBM_GPU_IDLE */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 3 - RBBM_ME_MS_TIMEOUT */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 4 - RBBM_PFP_MS_TIMEOUT */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 5 - RBBM_ATB_BUS_OVERFLOW */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 6 - RBBM_VFD_ERROR */
+ A3XX_IRQ_CALLBACK(NULL), /* 7 - CP_SW */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 8 - CP_T0_PACKET_IN_IB */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 9 - CP_OPCODE_ERROR */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 10 - CP_RESERVED_BIT_ERROR */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 11 - CP_HW_FAULT */
+ A3XX_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
+ A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 13 - CP_IB2_INT */
+ A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 14 - CP_IB1_INT */
+ A3XX_IRQ_CALLBACK(a3xx_cp_callback), /* 15 - CP_RB_INT */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 16 - CP_REG_PROTECT_FAULT */
+ A3XX_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+ A3XX_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
+ A3XX_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
+ A3XX_IRQ_CALLBACK(NULL), /* 20 - CP_CACHE_FLUSH_TS */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 21 - CP_AHB_ERROR_FAULT */
+ A3XX_IRQ_CALLBACK(NULL), /* 22 - Unused */
+ A3XX_IRQ_CALLBACK(NULL), /* 23 - Unused */
+ A3XX_IRQ_CALLBACK(NULL), /* 24 - MISC_HANG_DETECT */
+ A3XX_IRQ_CALLBACK(a3xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
+ /* 26 to 31 - Unused */
};
static irqreturn_t a3xx_irq_handler(struct adreno_device *adreno_dev)
@@ -2448,24 +2675,33 @@
adreno_regwrite(device, A3XX_RBBM_INT_0_MASK, 0);
}
+static unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev)
+{
+ unsigned int status;
+
+ adreno_regread(&adreno_dev->dev, A3XX_RBBM_INT_0_STATUS, &status);
+
+ return (status & A3XX_INT_MASK) ? 1 : 0;
+}
+
static unsigned int a3xx_busy_cycles(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
unsigned int reg, val;
-
+ /* Freeze the counter */
adreno_regread(device, A3XX_RBBM_RBBM_CTL, ®);
reg &= ~RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
-
+ /* Read the value */
adreno_regread(device, A3XX_RBBM_PERFCTR_PWR_1_LO, &val);
-
+ /* Reset the counter */
reg |= RBBM_RBBM_CTL_RESET_PWR_CTR1;
adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
-
+ /* Re-enable the counter */
reg &= ~RBBM_RBBM_CTL_RESET_PWR_CTR1;
reg |= RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, reg);
@@ -2478,8 +2714,9 @@
unsigned int val;
};
+/* VBIF registers start after 0x3000 so use 0x0 as end of list marker */
static struct a3xx_vbif_data a305_vbif[] = {
-
+ /* Set up 16 deep read/write request queues */
{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
@@ -2487,18 +2724,18 @@
{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
-
+ /* Enable WR-REQ */
{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
-
+ /* Set up round robin arbitration between both AXI ports */
{ A3XX_VBIF_ARB_CTL, 0x00000030 },
-
+ /* Set up AOOO */
{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
{0, 0},
};
static struct a3xx_vbif_data a320_vbif[] = {
-
+ /* Set up 16 deep read/write request queues */
{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
@@ -2506,19 +2743,47 @@
{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
-
+ /* Enable WR-REQ */
{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
-
+ /* Set up round robin arbitration between both AXI ports */
{ A3XX_VBIF_ARB_CTL, 0x00000030 },
-
+ /* Set up AOOO */
{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
-
+ /* Enable 1K sort */
{ A3XX_VBIF_ABIT_SORT, 0x000000FF },
{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
{0, 0},
};
+static struct a3xx_vbif_data a330_vbif[] = {
+ /* Set up 16 deep read/write request queues */
+ { A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+ { A3XX_VBIF_IN_RD_LIM_CONF1, 0x00001818 },
+ { A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00001818 },
+ { A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00001818 },
+ { A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+ { A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+ { A3XX_VBIF_IN_WR_LIM_CONF1, 0x00001818 },
+ /* Enable WR-REQ */
+ { A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+ /* Set up round robin arbitration between both AXI ports */
+ { A3XX_VBIF_ARB_CTL, 0x00000030 },
+ /* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+ { A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001 },
+ /* Set up AOOO */
+ { A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003F },
+ { A3XX_VBIF_OUT_AXI_AOOO, 0x003F003F },
+ /* Enable 1K sort */
+ { A3XX_VBIF_ABIT_SORT, 0x0001003F },
+ { A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+ /* Disable VBIF clock gating. This is to enable AXI running
+ * higher frequency than GPU.
+ */
+ { A3XX_VBIF_CLKON, 1 },
+ {0, 0},
+};
+
static void a3xx_start(struct adreno_device *adreno_dev)
{
struct kgsl_device *device = &adreno_dev->dev;
@@ -2528,6 +2793,8 @@
vbif = a305_vbif;
else if (adreno_is_a320(adreno_dev))
vbif = a320_vbif;
+ else if (adreno_is_a330(adreno_dev))
+ vbif = a330_vbif;
BUG_ON(vbif == NULL);
@@ -2536,32 +2803,66 @@
vbif++;
}
-
+ /* Make all blocks contribute to the GPU BUSY perf counter */
adreno_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
-
+ /* Tune the hystersis counters for SP and CP idle detection */
adreno_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
adreno_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+ /* Enable the RBBM error reporting bits. This lets us get
+ useful information on failure */
adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
-
+ /* Enable AHB error reporting */
adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
-
+ /* Turn on the power counters */
adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
+ /* Turn on hang detection - this spews a lot of useful information
+ * into the RBBM registers on a hang */
adreno_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
(1 << 16) | 0xFFF);
-
+ /* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */
+ adreno_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
+
+ /* Enable Clock gating */
adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
A3XX_RBBM_CLOCK_CTL_DEFAULT);
+ /* Set the OCMEM base address for A330 */
+ if (adreno_is_a330(adreno_dev)) {
+ adreno_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
+ (unsigned int)(adreno_dev->ocmem_base >> 14));
+ }
+
+ /* Turn on performance counters */
+ adreno_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
+
+ /*
+ * Set SP perfcounter 5 to count SP_ALU_ACTIVE_CYCLES, it includes
+ * all ALU instruction execution regardless precision or shader ID.
+ * Set SP perfcounter 6 to count SP0_ICL1_MISSES, It counts
+ * USP L1 instruction miss request.
+ * Set SP perfcounter 7 to count SP_FS_FULL_ALU_INSTRUCTIONS, it
+ * counts USP flow control instruction execution.
+ * we will use this to augment our hang detection
+ */
+ if (adreno_dev->fast_hang_detect) {
+ adreno_regwrite(device, A3XX_SP_PERFCOUNTER5_SELECT,
+ SP_ALU_ACTIVE_CYCLES);
+ adreno_regwrite(device, A3XX_SP_PERFCOUNTER6_SELECT,
+ SP0_ICL1_MISSES);
+ adreno_regwrite(device, A3XX_SP_PERFCOUNTER7_SELECT,
+ SP_FS_CFLOW_INSTRUCTIONS);
+ }
}
+/* Defined in adreno_a3xx_snapshot.c */
void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang);
@@ -2577,6 +2878,7 @@
.rb_init = a3xx_rb_init,
.irq_control = a3xx_irq_control,
.irq_handler = a3xx_irq_handler,
+ .irq_pending = a3xx_irq_pending,
.busy_cycles = a3xx_busy_cycles,
.start = a3xx_start,
.snapshot = a3xx_snapshot,
diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c
index 14cdaaa..d9d5ec8 100644
--- a/drivers/gpu/msm/adreno_a3xx_snapshot.c
+++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -113,6 +113,12 @@
header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
header->size = size;
+ /*
+ * Read the firmware from the GPU rather than use our cache in order to
+ * try to catch mis-programming or corruption in the hardware. We do
+ * use the cached version of the size, however, instead of trying to
+ * maintain always changing hardcoded constants
+ */
adreno_regwrite(device, REG_CP_ME_RAM_RADDR, 0x0);
for (i = 0; i < size; i++)
@@ -137,6 +143,12 @@
header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
header->size = size;
+ /*
+ * Read the firmware from the GPU rather than use our cache in order to
+ * try to catch mis-programming or corruption in the hardware. We do
+ * use the cached version of the size, however, instead of trying to
+ * maintain always changing hardcoded constants
+ */
kgsl_regwrite(device, A3XX_CP_PFP_UCODE_ADDR, 0x0);
for (i = 0; i < size; i++)
adreno_regread(device, A3XX_CP_PFP_UCODE_DATA, &data[i]);
@@ -144,28 +156,68 @@
return DEBUG_SECTION_SZ(size);
}
-#define CP_ROQ_SIZE 128
+/* This is the ROQ buffer size on both the A305 and A320 */
+#define A320_CP_ROQ_SIZE 128
+/* This is the ROQ buffer size on the A330 */
+#define A330_CP_ROQ_SIZE 512
static int a3xx_snapshot_cp_roq(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
struct kgsl_snapshot_debug *header = snapshot;
unsigned int *data = snapshot + sizeof(*header);
- int i;
+ int i, size;
- if (remain < DEBUG_SECTION_SZ(CP_ROQ_SIZE)) {
+ /* The size of the ROQ buffer is core dependent */
+ size = adreno_is_a330(adreno_dev) ?
+ A330_CP_ROQ_SIZE : A320_CP_ROQ_SIZE;
+
+ if (remain < DEBUG_SECTION_SZ(size)) {
SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
return 0;
}
header->type = SNAPSHOT_DEBUG_CP_ROQ;
- header->size = CP_ROQ_SIZE;
+ header->size = size;
adreno_regwrite(device, A3XX_CP_ROQ_ADDR, 0x0);
- for (i = 0; i < CP_ROQ_SIZE; i++)
+ for (i = 0; i < size; i++)
adreno_regread(device, A3XX_CP_ROQ_DATA, &data[i]);
- return DEBUG_SECTION_SZ(CP_ROQ_SIZE);
+ return DEBUG_SECTION_SZ(size);
+}
+
+#define A330_CP_MERCIU_QUEUE_SIZE 32
+
+static int a330_snapshot_cp_merciu(struct kgsl_device *device, void *snapshot,
+ int remain, void *priv)
+{
+ struct kgsl_snapshot_debug *header = snapshot;
+ unsigned int *data = snapshot + sizeof(*header);
+ int i, size;
+
+ /* The MERCIU data is two dwords per entry */
+ size = A330_CP_MERCIU_QUEUE_SIZE << 1;
+
+ if (remain < DEBUG_SECTION_SZ(size)) {
+ SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG");
+ return 0;
+ }
+
+ header->type = SNAPSHOT_DEBUG_CP_MERCIU;
+ header->size = size;
+
+ adreno_regwrite(device, A3XX_CP_MERCIU_ADDR, 0x0);
+
+ for (i = 0; i < A330_CP_MERCIU_QUEUE_SIZE; i++) {
+ adreno_regread(device, A3XX_CP_MERCIU_DATA,
+ &data[(i * 2)]);
+ adreno_regread(device, A3XX_CP_MERCIU_DATA2,
+ &data[(i * 2) + 1]);
+ }
+
+ return DEBUG_SECTION_SZ(size);
}
#define DEBUGFS_BLOCK_SIZE 0x40
@@ -245,52 +297,97 @@
return snapshot;
}
+static void _snapshot_a3xx_regs(struct kgsl_snapshot_registers *regs,
+ struct kgsl_snapshot_registers_list *list)
+{
+ regs[list->count].regs = (unsigned int *) a3xx_registers;
+ regs[list->count].count = a3xx_registers_count;
+ list->count++;
+}
+
+static void _snapshot_hlsq_regs(struct kgsl_snapshot_registers *regs,
+ struct kgsl_snapshot_registers_list *list,
+ struct adreno_device *adreno_dev)
+{
+ /* HLSQ specific registers */
+ /*
+ * Don't dump any a3xx HLSQ registers just yet. Reading the HLSQ
+ * registers can cause the device to hang if the HLSQ block is
+ * busy. Add specific checks for each a3xx core as the requirements
+ * are discovered. Disable by default for now.
+ */
+ if (!adreno_is_a3xx(adreno_dev)) {
+ regs[list->count].regs = (unsigned int *) a3xx_hlsq_registers;
+ regs[list->count].count = a3xx_hlsq_registers_count;
+ list->count++;
+ }
+}
+
+static void _snapshot_a330_regs(struct kgsl_snapshot_registers *regs,
+ struct kgsl_snapshot_registers_list *list)
+{
+ /* For A330, append the additional list of new registers to grab */
+ regs[list->count].regs = (unsigned int *) a330_registers;
+ regs[list->count].count = a330_registers_count;
+ list->count++;
+}
+
+/* A3XX GPU snapshot function - this is where all of the A3XX specific
+ * bits and pieces are grabbed into the snapshot memory
+ */
void *a3xx_snapshot(struct adreno_device *adreno_dev, void *snapshot,
int *remain, int hang)
{
struct kgsl_device *device = &adreno_dev->dev;
- struct kgsl_snapshot_registers regs;
+ struct kgsl_snapshot_registers_list list;
+ struct kgsl_snapshot_registers regs[5];
- regs.regs = (unsigned int *) a3xx_registers;
- regs.count = a3xx_registers_count;
+ list.registers = regs;
+ list.count = 0;
-
+ /* Disable Clock gating temporarily for the debug bus to work */
adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL, 0x00);
-
+ /* Store relevant registers in list to snapshot */
+ _snapshot_a3xx_regs(regs, &list);
+ _snapshot_hlsq_regs(regs, &list, adreno_dev);
+ if (adreno_is_a330(adreno_dev))
+ _snapshot_a330_regs(regs, &list);
+
+ /* Master set of (non debug) registers */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_REGS, snapshot, remain,
- kgsl_snapshot_dump_regs, ®s);
+ kgsl_snapshot_dump_regs, &list);
-
+ /* CP_STATE_DEBUG indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_CP_STATE_DEBUG_INDEX,
REG_CP_STATE_DEBUG_DATA, 0x0, 0x14);
-
+ /* CP_ME indexed registers */
snapshot = kgsl_snapshot_indexed_registers(device, snapshot,
remain, REG_CP_ME_CNTL, REG_CP_ME_STATUS,
64, 44);
-
+ /* VPC memory */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a3xx_snapshot_vpc_memory, NULL);
-
+ /* CP MEQ */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a3xx_snapshot_cp_meq, NULL);
-
+ /* Shader working/shadow memory */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a3xx_snapshot_shader_memory, NULL);
-
-
+ /* CP PFP and PM4 */
+ /* Reading these will hang the GPU if it isn't already hung */
if (hang) {
snapshot = kgsl_snapshot_add_section(device,
@@ -302,14 +399,20 @@
a3xx_snapshot_cp_pm4_ram, NULL);
}
-
+ /* CP ROQ */
snapshot = kgsl_snapshot_add_section(device,
KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
a3xx_snapshot_cp_roq, NULL);
+ if (adreno_is_a330(adreno_dev)) {
+ snapshot = kgsl_snapshot_add_section(device,
+ KGSL_SNAPSHOT_SECTION_DEBUG, snapshot, remain,
+ a330_snapshot_cp_merciu, NULL);
+ }
+
snapshot = a3xx_snapshot_debugbus(device, snapshot, remain);
-
+ /* Enable Clock gating */
adreno_regwrite(device, A3XX_RBBM_CLOCK_CTL,
A3XX_RBBM_CLOCK_CTL_DEFAULT);
diff --git a/drivers/gpu/msm/adreno_a3xx_trace.c b/drivers/gpu/msm/adreno_a3xx_trace.c
index 80756c6..325b068 100644
--- a/drivers/gpu/msm/adreno_a3xx_trace.c
+++ b/drivers/gpu/msm/adreno_a3xx_trace.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -14,6 +14,7 @@
#include "kgsl.h"
#include "adreno.h"
+/* Instantiate tracepoints */
#define CREATE_TRACE_POINTS
#include "a3xx_reg.h"
#include "adreno_a3xx_trace.h"
diff --git a/drivers/gpu/msm/adreno_a3xx_trace.h b/drivers/gpu/msm/adreno_a3xx_trace.h
index e4b4d11..d48faf4 100644
--- a/drivers/gpu/msm/adreno_a3xx_trace.h
+++ b/drivers/gpu/msm/adreno_a3xx_trace.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,9 @@
struct kgsl_device;
+/*
+ * Tracepoint for a3xx irq. Includes status info
+ */
TRACE_EVENT(kgsl_a3xx_irq_status,
TP_PROTO(struct kgsl_device *device, unsigned int status),
@@ -80,6 +83,7 @@
)
);
-#endif
+#endif /* _ADRENO_A3XX_TRACE_H */
+/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
index 70eb2db..890c8a1 100644
--- a/drivers/gpu/msm/adreno_debugfs.c
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -18,67 +18,11 @@
#include <linux/io.h>
#include "kgsl.h"
-#include "adreno_postmortem.h"
#include "adreno.h"
#include "a2xx_reg.h"
unsigned int kgsl_cff_dump_enable;
-int adreno_pm_regs_enabled;
-int adreno_pm_ib_enabled;
-
-static struct dentry *pm_d_debugfs;
-
-static int pm_dump_set(void *data, u64 val)
-{
- struct kgsl_device *device = data;
-
- if (val) {
- mutex_lock(&device->mutex);
- adreno_postmortem_dump(device, 1);
- mutex_unlock(&device->mutex);
- }
-
- return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(pm_dump_fops,
- NULL,
- pm_dump_set, "%llu\n");
-
-static int pm_regs_enabled_set(void *data, u64 val)
-{
- adreno_pm_regs_enabled = val ? 1 : 0;
- return 0;
-}
-
-static int pm_regs_enabled_get(void *data, u64 *val)
-{
- *val = adreno_pm_regs_enabled;
- return 0;
-}
-
-static int pm_ib_enabled_set(void *data, u64 val)
-{
- adreno_pm_ib_enabled = val ? 1 : 0;
- return 0;
-}
-
-static int pm_ib_enabled_get(void *data, u64 *val)
-{
- *val = adreno_pm_ib_enabled;
- return 0;
-}
-
-
-DEFINE_SIMPLE_ATTRIBUTE(pm_regs_enabled_fops,
- pm_regs_enabled_get,
- pm_regs_enabled_set, "%llu\n");
-
-DEFINE_SIMPLE_ATTRIBUTE(pm_ib_enabled_fops,
- pm_ib_enabled_get,
- pm_ib_enabled_set, "%llu\n");
-
static int kgsl_cff_dump_enable_set(void *data, u64 val)
{
@@ -116,23 +60,43 @@
&adreno_dev->wait_timeout);
debugfs_create_u32("ib_check", 0644, device->d_debugfs,
&adreno_dev->ib_check_level);
-
-
+ /* By Default enable fast hang detection */
adreno_dev->fast_hang_detect = 1;
debugfs_create_u32("fast_hang_detect", 0644, device->d_debugfs,
&adreno_dev->fast_hang_detect);
-
+ /* Top level switch to enable/disable userspace FT control */
+ adreno_dev->ft_user_control = 0;
+ debugfs_create_u32("ft_user_control", 0644, device->d_debugfs,
+ &adreno_dev->ft_user_control);
+ /*
+ * FT policy can be set to any of the options below.
+ * KGSL_FT_DISABLE -> BIT(0) Set to disable FT
+ * KGSL_FT_REPLAY -> BIT(1) Set to enable replay
+ * KGSL_FT_SKIPIB -> BIT(2) Set to skip IB
+ * KGSL_FT_SKIPFRAME -> BIT(3) Set to skip frame
+ * by default set FT policy to KGSL_FT_DEFAULT_POLICY
+ */
+ adreno_dev->ft_policy = KGSL_FT_DEFAULT_POLICY;
+ debugfs_create_u32("ft_policy", 0644, device->d_debugfs,
+ &adreno_dev->ft_policy);
- pm_d_debugfs = debugfs_create_dir("postmortem", device->d_debugfs);
+ /* By default enable long IB detection */
+ adreno_dev->long_ib_detect = 1;
+ debugfs_create_u32("long_ib_detect", 0644, device->d_debugfs,
+ &adreno_dev->long_ib_detect);
- if (IS_ERR(pm_d_debugfs))
- return;
-
- debugfs_create_file("dump", 0600, pm_d_debugfs, device,
- &pm_dump_fops);
- debugfs_create_file("regs_enabled", 0644, pm_d_debugfs, device,
- &pm_regs_enabled_fops);
- debugfs_create_file("ib_enabled", 0644, pm_d_debugfs, device,
- &pm_ib_enabled_fops);
+ /*
+ * FT pagefault policy can be set to any of the options below.
+ * KGSL_FT_PAGEFAULT_INT_ENABLE -> BIT(0) set to enable pagefault INT
+ * KGSL_FT_PAGEFAULT_GPUHALT_ENABLE -> BIT(1) Set to enable GPU HALT on
+ * pagefaults. This stalls the GPU on a pagefault on IOMMU v1 HW.
+ * KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE -> BIT(2) Set to log only one
+ * pagefault per page.
+ * KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT -> BIT(3) Set to log only one
+ * pagefault per INT.
+ */
+ adreno_dev->ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY;
+ debugfs_create_u32("ft_pagefault_policy", 0644, device->d_debugfs,
+ &adreno_dev->ft_pf_policy);
}
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index 4db7258..6fbcdee 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,6 +19,7 @@
#define KGSL_INIT_REFTIMESTAMP 0x7FFFFFFF
+/* quad for copying GMEM to context shadow */
#define QUAD_LEN 12
#define QUAD_RESTORE_LEN 14
@@ -46,7 +47,15 @@
0x3f800000, 0x00000000
};
+/*
+ * Helper functions
+ * These are global helper functions used by the GPUs during context switch
+ */
+/**
+ * uint2float - convert a uint to IEEE754 single precision float
+ * @ uintval - value to convert
+ */
unsigned int uint2float(unsigned int uintval)
{
@@ -57,11 +66,11 @@
exp = ilog2(uintval);
-
+ /* Calculate fraction */
if (23 > exp)
frac = (uintval & (~(1 << exp))) << (23 - exp);
-
+ /* Exp is biased by 127 and shifted 23 bits */
exp = (exp + 127) << 23;
return exp | frac;
@@ -69,7 +78,7 @@
static void set_gmem_copy_quad(struct gmem_shadow_t *shadow)
{
-
+ /* set vertex buffer values */
gmem_copy_quad[1] = uint2float(shadow->height);
gmem_copy_quad[3] = uint2float(shadow->width);
gmem_copy_quad[4] = uint2float(shadow->height);
@@ -86,26 +95,33 @@
TEXCOORD_LEN << 2);
}
+/**
+ * build_quad_vtxbuff - Create a quad for saving/restoring GMEM
+ * @ context - Pointer to the context being created
+ * @ shadow - Pointer to the GMEM shadow structure
+ * @ incmd - Pointer to pointer to the temporary command buffer
+ */
+/* quad for saving/restoring gmem */
void build_quad_vtxbuff(struct adreno_context *drawctxt,
struct gmem_shadow_t *shadow, unsigned int **incmd)
{
unsigned int *cmd = *incmd;
-
+ /* quad vertex buffer location (in GPU space) */
shadow->quad_vertices.hostptr = cmd;
shadow->quad_vertices.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
cmd += QUAD_LEN;
-
+ /* Used by A3XX, but define for both to make the code easier */
shadow->quad_vertices_restore.hostptr = cmd;
shadow->quad_vertices_restore.gpuaddr =
virt2gpu(cmd, &drawctxt->gpustate);
cmd += QUAD_RESTORE_LEN;
-
+ /* tex coord buffer location (in GPU space) */
shadow->quad_texcoords.hostptr = cmd;
shadow->quad_texcoords.gpuaddr = virt2gpu(cmd, &drawctxt->gpustate);
@@ -115,12 +131,23 @@
*incmd = cmd;
}
+/**
+ * adreno_drawctxt_create - create a new adreno draw context
+ * @device - KGSL device to create the context on
+ * @pagetable - Pagetable for the context
+ * @context- Generic KGSL context structure
+ * @flags - flags for the context (passed from user space)
+ *
+ * Create a new draw context for the 3D core. Return 0 on success,
+ * or error code on failure.
+ */
int adreno_drawctxt_create(struct kgsl_device *device,
struct kgsl_pagetable *pagetable,
struct kgsl_context *context, uint32_t flags)
{
struct adreno_context *drawctxt;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
int ret;
drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
@@ -128,9 +155,12 @@
if (drawctxt == NULL)
return -ENOMEM;
+ drawctxt->pid = task_pid_nr(current);
+ strlcpy(drawctxt->pid_name, current->comm, TASK_COMM_LEN);
drawctxt->pagetable = pagetable;
drawctxt->bin_base_offset = 0;
drawctxt->id = context->id;
+ rb->timestamp[context->id] = 0;
if (flags & KGSL_CONTEXT_PREAMBLE)
drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
@@ -141,6 +171,17 @@
if (flags & KGSL_CONTEXT_PER_CONTEXT_TS)
drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
+ if (flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+ if (!(flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
+ ret = -EINVAL;
+ goto err;
+ }
+ drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
+ }
+
+ if (flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+ drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE;
+
ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
if (ret)
goto err;
@@ -148,6 +189,12 @@
kgsl_sharedmem_writel(&device->memstore,
KGSL_MEMSTORE_OFFSET(drawctxt->id, ref_wait_ts),
KGSL_INIT_REFTIMESTAMP);
+ kgsl_sharedmem_writel(&device->memstore,
+ KGSL_MEMSTORE_OFFSET(drawctxt->id, ts_cmp_enable), 0);
+ kgsl_sharedmem_writel(&device->memstore,
+ KGSL_MEMSTORE_OFFSET(drawctxt->id, soptimestamp), 0);
+ kgsl_sharedmem_writel(&device->memstore,
+ KGSL_MEMSTORE_OFFSET(drawctxt->id, eoptimestamp), 0);
context->devctxt = drawctxt;
return 0;
@@ -156,7 +203,16 @@
return ret;
}
+/**
+ * adreno_drawctxt_destroy - destroy a draw context
+ * @device - KGSL device that owns the context
+ * @context- Generic KGSL context container for the context
+ *
+ * Destroy an existing context. Return 0 on success or error
+ * code on failure.
+ */
+/* destroy a drawing context */
void adreno_drawctxt_destroy(struct kgsl_device *device,
struct kgsl_context *context)
@@ -168,15 +224,18 @@
return;
drawctxt = context->devctxt;
-
+ /* deactivate context */
if (adreno_dev->drawctxt_active == drawctxt) {
+ /* no need to save GMEM or shader, the context is
+ * being destroyed.
+ */
drawctxt->flags &= ~(CTXT_FLAGS_GMEM_SAVE |
CTXT_FLAGS_SHADER_SAVE |
CTXT_FLAGS_GMEM_SHADOW |
CTXT_FLAGS_STATE_SHADOW);
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- device->current_process_priv = NULL;
-#endif
+
+ drawctxt->flags |= CTXT_FLAGS_BEING_DESTROYED;
+
adreno_drawctxt_switch(adreno_dev, NULL, 0);
}
@@ -194,6 +253,14 @@
context->devctxt = NULL;
}
+/**
+ * adreno_drawctxt_set_bin_base_offset - set bin base offset for the context
+ * @device - KGSL device that owns the context
+ * @context- Generic KGSL context container for the context
+ * @offset - Offset to set
+ *
+ * Set the bin base offset for A2XX devices. Not valid for A3XX devices.
+ */
void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device,
struct kgsl_context *context,
@@ -205,6 +272,14 @@
drawctxt->bin_base_offset = offset;
}
+/**
+ * adreno_drawctxt_switch - switch the current draw context
+ * @adreno_dev - The 3D device that owns the context
+ * @drawctxt - the 3D context to switch to
+ * @flags - Flags to accompany the switch (from user space)
+ *
+ * Switch the current draw context
+ */
void adreno_drawctxt_switch(struct adreno_device *adreno_dev,
struct adreno_context *drawctxt,
@@ -214,13 +289,15 @@
if (drawctxt) {
if (flags & KGSL_CONTEXT_SAVE_GMEM)
+ /* Set the flag in context so that the save is done
+ * when this context is switched out. */
drawctxt->flags |= CTXT_FLAGS_GMEM_SAVE;
else
-
+ /* Remove GMEM saving flag from the context */
drawctxt->flags &= ~CTXT_FLAGS_GMEM_SAVE;
}
-
+ /* already current? */
if (adreno_dev->drawctxt_active == drawctxt) {
if (adreno_dev->gpudev->ctxt_draw_workaround &&
adreno_is_a225(adreno_dev))
@@ -232,10 +309,10 @@
KGSL_CTXT_INFO(device, "from %p to %p flags %d\n",
adreno_dev->drawctxt_active, drawctxt, flags);
-
+ /* Save the old context */
adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active);
-
+ /* Set the new context */
adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt);
adreno_dev->drawctxt_active = drawctxt;
}
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index f66dfbb..fd60688 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,44 +13,73 @@
#ifndef __ADRENO_DRAWCTXT_H
#define __ADRENO_DRAWCTXT_H
+#include <linux/sched.h>
+
#include "adreno_pm4types.h"
#include "a2xx_reg.h"
+/* Flags */
#define CTXT_FLAGS_NOT_IN_USE 0x00000000
-#define CTXT_FLAGS_IN_USE 0x00000001
+#define CTXT_FLAGS_IN_USE BIT(0)
-#define CTXT_FLAGS_STATE_SHADOW 0x00000010
+/* state shadow memory allocated */
+#define CTXT_FLAGS_STATE_SHADOW BIT(1)
-#define CTXT_FLAGS_GMEM_SHADOW 0x00000100
-#define CTXT_FLAGS_GMEM_SAVE 0x00000200
-#define CTXT_FLAGS_GMEM_RESTORE 0x00000400
-#define CTXT_FLAGS_PREAMBLE 0x00000800
-#define CTXT_FLAGS_SHADER_SAVE 0x00002000
-#define CTXT_FLAGS_SHADER_RESTORE 0x00004000
-#define CTXT_FLAGS_GPU_HANG 0x00008000
-#define CTXT_FLAGS_NOGMEMALLOC 0x00010000
-#define CTXT_FLAGS_TRASHSTATE 0x00020000
-#define CTXT_FLAGS_PER_CONTEXT_TS 0x00040000
-#define CTXT_FLAGS_GPU_HANG_RECOVERED 0x00008000
+/* gmem shadow memory allocated */
+#define CTXT_FLAGS_GMEM_SHADOW BIT(2)
+/* gmem must be copied to shadow */
+#define CTXT_FLAGS_GMEM_SAVE BIT(3)
+/* gmem can be restored from shadow */
+#define CTXT_FLAGS_GMEM_RESTORE BIT(4)
+/* preamble packed in cmdbuffer for context switching */
+#define CTXT_FLAGS_PREAMBLE BIT(5)
+/* shader must be copied to shadow */
+#define CTXT_FLAGS_SHADER_SAVE BIT(6)
+/* shader can be restored from shadow */
+#define CTXT_FLAGS_SHADER_RESTORE BIT(7)
+/* Context has caused a GPU hang */
+#define CTXT_FLAGS_GPU_HANG BIT(8)
+/* Specifies there is no need to save GMEM */
+#define CTXT_FLAGS_NOGMEMALLOC BIT(9)
+/* Trash state for context */
+#define CTXT_FLAGS_TRASHSTATE BIT(10)
+/* per context timestamps enabled */
+#define CTXT_FLAGS_PER_CONTEXT_TS BIT(11)
+/* Context has caused a GPU hang and fault tolerance successful */
+#define CTXT_FLAGS_GPU_HANG_FT BIT(12)
+/* Context is being destroyed so dont save it */
+#define CTXT_FLAGS_BEING_DESTROYED BIT(13)
+/* User mode generated timestamps enabled */
+#define CTXT_FLAGS_USER_GENERATED_TS BIT(14)
+/* Context skip till EOF */
+#define CTXT_FLAGS_SKIP_EOF BIT(15)
+/* Context no fault tolerance */
+#define CTXT_FLAGS_NO_FAULT_TOLERANCE BIT(16)
struct kgsl_device;
struct adreno_device;
struct kgsl_device_private;
struct kgsl_context;
+/* draw context */
struct gmem_shadow_t {
- struct kgsl_memdesc gmemshadow;
+ struct kgsl_memdesc gmemshadow; /* Shadow buffer address */
+ /*
+ * 256 KB GMEM surface = 4 bytes-per-pixel x 256 pixels/row x
+ * 256 rows. Width & height must be multiples of 32 in case tiled
+ * textures are used
+ */
- enum COLORFORMATX format;
- unsigned int size;
- unsigned int width;
- unsigned int height;
- unsigned int pitch;
- unsigned int gmem_pitch;
- unsigned int *gmem_save_commands;
- unsigned int *gmem_restore_commands;
+ enum COLORFORMATX format; /* Unused on A3XX */
+ unsigned int size; /* Size of surface used to store GMEM */
+ unsigned int width; /* Width of surface used to store GMEM */
+ unsigned int height; /* Height of surface used to store GMEM */
+ unsigned int pitch; /* Pitch of surface used to store GMEM */
+ unsigned int gmem_pitch; /* Pitch value used for GMEM */
+ unsigned int *gmem_save_commands; /* Unused on A3XX */
+ unsigned int *gmem_restore_commands; /* Unused on A3XX */
unsigned int gmem_save[3];
unsigned int gmem_restore[3];
struct kgsl_memdesc quad_vertices;
@@ -59,7 +88,10 @@
};
struct adreno_context {
+ pid_t pid;
+ char pid_name[TASK_COMM_LEN];
unsigned int id;
+ unsigned int ib_gpu_time_used;
uint32_t flags;
struct kgsl_pagetable *pagetable;
struct kgsl_memdesc gpustate;
@@ -67,16 +99,16 @@
unsigned int shader_save[3];
unsigned int shader_restore[3];
-
+ /* Information of the GMEM shadow that is created in context create */
struct gmem_shadow_t context_gmem_shadow;
-
+ /* A2XX specific items */
unsigned int reg_save[3];
unsigned int shader_fixup[3];
unsigned int chicken_restore[3];
unsigned int bin_base_offset;
-
+ /* A3XX specific items */
unsigned int regconstant_save[3];
unsigned int constant_restore[3];
unsigned int hlsqcontrol_restore[3];
@@ -105,6 +137,7 @@
struct kgsl_context *context,
unsigned int offset);
+/* GPU context switch helper functions */
void build_quad_vtxbuff(struct adreno_context *drawctxt,
struct gmem_shadow_t *shadow, unsigned int **incmd);
@@ -131,8 +164,8 @@
static inline unsigned int *reg_range(unsigned int *cmd, unsigned int start,
unsigned int end)
{
- *cmd++ = CP_REG(start);
- *cmd++ = end - start + 1;
+ *cmd++ = CP_REG(start); /* h/w regs, start addr */
+ *cmd++ = end - start + 1; /* count */
return cmd;
}
@@ -142,7 +175,7 @@
shadow->format = COLORX_8_8_8_8;
-
+ /* convert from bytes to 32-bit words */
gmem_size = (gmem_size + 3) / 4;
while ((w * h) < gmem_size) {
@@ -158,4 +191,4 @@
shadow->size = shadow->pitch * shadow->height * 4;
}
-#endif
+#endif /* __ADRENO_DRAWCTXT_H */
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index c6ebed4..a3fa312 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -22,103 +22,159 @@
#define CP_TYPE3_PKT ((unsigned int)3 << 30)
+/* type3 packets */
+/* initialize CP's micro-engine */
#define CP_ME_INIT 0x48
+/* skip N 32-bit words to get to the next packet */
#define CP_NOP 0x10
+/* indirect buffer dispatch. same as IB, but init is pipelined */
#define CP_INDIRECT_BUFFER_PFD 0x37
+/* wait for the IDLE state of the engine */
#define CP_WAIT_FOR_IDLE 0x26
+/* wait until a register or memory location is a specific value */
#define CP_WAIT_REG_MEM 0x3c
+/* wait until a register location is equal to a specific value */
#define CP_WAIT_REG_EQ 0x52
+/* wait until a register location is >= a specific value */
#define CP_WAT_REG_GTE 0x53
+/* wait until a read completes */
#define CP_WAIT_UNTIL_READ 0x5c
+/* wait until all base/size writes from an IB_PFD packet have completed */
#define CP_WAIT_IB_PFD_COMPLETE 0x5d
+/* register read/modify/write */
#define CP_REG_RMW 0x21
+/* Set binning configuration registers */
#define CP_SET_BIN_DATA 0x2f
+/* reads register in chip and writes to memory */
#define CP_REG_TO_MEM 0x3e
+/* write N 32-bit words to memory */
#define CP_MEM_WRITE 0x3d
+/* write CP_PROG_COUNTER value to memory */
#define CP_MEM_WRITE_CNTR 0x4f
+/* conditional execution of a sequence of packets */
#define CP_COND_EXEC 0x44
+/* conditional write to memory or register */
#define CP_COND_WRITE 0x45
+/* generate an event that creates a write to memory when completed */
#define CP_EVENT_WRITE 0x46
+/* generate a VS|PS_done event */
#define CP_EVENT_WRITE_SHD 0x58
+/* generate a cache flush done event */
#define CP_EVENT_WRITE_CFL 0x59
+/* generate a z_pass done event */
#define CP_EVENT_WRITE_ZPD 0x5b
+/* initiate fetch of index buffer and draw */
#define CP_DRAW_INDX 0x22
+/* draw using supplied indices in packet */
#define CP_DRAW_INDX_2 0x36
+/* initiate fetch of index buffer and binIDs and draw */
#define CP_DRAW_INDX_BIN 0x34
+/* initiate fetch of bin IDs and draw using supplied indices */
#define CP_DRAW_INDX_2_BIN 0x35
+/* begin/end initiator for viz query extent processing */
#define CP_VIZ_QUERY 0x23
+/* fetch state sub-blocks and initiate shader code DMAs */
#define CP_SET_STATE 0x25
+/* load constant into chip and to memory */
#define CP_SET_CONSTANT 0x2d
+/* load sequencer instruction memory (pointer-based) */
#define CP_IM_LOAD 0x27
+/* load sequencer instruction memory (code embedded in packet) */
#define CP_IM_LOAD_IMMEDIATE 0x2b
+/* load constants from a location in memory */
#define CP_LOAD_CONSTANT_CONTEXT 0x2e
+/* (A2x) sets binning configuration registers */
#define CP_SET_BIN_DATA 0x2f
+/* selective invalidation of state pointers */
#define CP_INVALIDATE_STATE 0x3b
+/* dynamically changes shader instruction memory partition */
#define CP_SET_SHADER_BASES 0x4A
+/* sets the 64-bit BIN_MASK register in the PFP */
#define CP_SET_BIN_MASK 0x50
+/* sets the 64-bit BIN_SELECT register in the PFP */
#define CP_SET_BIN_SELECT 0x51
+/* updates the current context, if needed */
#define CP_CONTEXT_UPDATE 0x5e
+/* generate interrupt from the command stream */
#define CP_INTERRUPT 0x40
+/* copy sequencer instruction memory to system memory */
#define CP_IM_STORE 0x2c
+/* test 2 memory locations to dword values specified */
#define CP_TEST_TWO_MEMS 0x71
+/* PFP waits until the FIFO between the PFP and the ME is empty */
#define CP_WAIT_FOR_ME 0x13
+/*
+ * for a20x
+ * program an offset that will added to the BIN_BASE value of
+ * the 3D_DRAW_INDX_BIN packet
+ */
#define CP_SET_BIN_BASE_OFFSET 0x4B
+/*
+ * for a22x
+ * sets draw initiator flags register in PFP, gets bitwise-ORed into
+ * every draw initiator
+ */
#define CP_SET_DRAW_INIT_FLAGS 0x4B
-#define CP_SET_PROTECTED_MODE 0x5f
+#define CP_SET_PROTECTED_MODE 0x5f /* sets the register protection mode */
+/*
+ * for a3xx
+ */
-#define CP_LOAD_STATE 0x30
+#define CP_LOAD_STATE 0x30 /* load high level sequencer command */
-#define CP_COND_INDIRECT_BUFFER_PFE 0x3A
-#define CP_COND_INDIRECT_BUFFER_PFD 0x32
+/* Conditionally load a IB based on a flag */
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */
+/* Load a buffer with pre-fetch enabled */
#define CP_INDIRECT_BUFFER_PFE 0x3F
#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000
@@ -128,6 +184,7 @@
#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000
#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002
+/* packet header building macros */
#define cp_type0_packet(regindx, cnt) \
(CP_TYPE0_PKT | (((cnt)-1) << 16) | ((regindx) & 0x7FFF))
@@ -152,6 +209,10 @@
#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+/*
+ * Check both for the type3 opcode and make sure that the reserved bits [1:7]
+ * and 15 are 0
+ */
#define pkt_is_type3(pkt) \
((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \
@@ -160,15 +221,19 @@
#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+/* packet headers */
#define CP_HDR_ME_INIT cp_type3_packet(CP_ME_INIT, 18)
#define CP_HDR_INDIRECT_BUFFER_PFD cp_type3_packet(CP_INDIRECT_BUFFER_PFD, 2)
#define CP_HDR_INDIRECT_BUFFER_PFE cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2)
+/* dword base address of the GFX decode space */
#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
+/* gmem command buffer length */
#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg)))
+/* Return 1 if the command is an indirect buffer of any kind */
static inline int adreno_cmd_is_ib(unsigned int cmd)
{
return (cmd == cp_type3_packet(CP_INDIRECT_BUFFER_PFE, 2) ||
@@ -177,4 +242,4 @@
cmd == cp_type3_packet(CP_COND_INDIRECT_BUFFER_PFD, 2));
}
-#endif
+#endif /* __ADRENO_PM4TYPES_H */
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 45286dd..cf1cf90 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,8 +19,6 @@
#include "adreno.h"
#include "adreno_pm4types.h"
#include "adreno_ringbuffer.h"
-#include "adreno_postmortem.h"
-#include "adreno_debugfs.h"
#include "kgsl_cffdump.h"
#include "kgsl_pwrctrl.h"
@@ -136,7 +134,7 @@
int range = 0, offset = 0;
for (range = 0; range < size; range++) {
-
+ /* start and end are in dword offsets */
int start = registers[range * 2];
int end = registers[range * 2 + 1];
@@ -194,7 +192,7 @@
dump_ib(device, "IB1:", pt_base, base_offset, ib1_base,
ib1_size, dump);
-
+ /* fetch virtual address for given IB base */
ib1_addr = (uint32_t *)adreno_convertaddr(device, pt_base,
ib1_base, ib1_size*sizeof(uint32_t));
if (!ib1_addr)
@@ -206,7 +204,7 @@
uint32_t ib2_base = ib1_addr[i++];
uint32_t ib2_size = ib1_addr[i++];
-
+ /* find previous match */
for (j = 0; j < ib_list->count; ++j)
if (ib_list->sizes[j] == ib2_size
&& ib_list->bases[j] == ib2_base)
@@ -216,7 +214,7 @@
>= IB_LIST_SIZE)
continue;
-
+ /* store match */
ib_list->sizes[ib_list->count] = ib2_size;
ib_list->bases[ib_list->count] = ib2_base;
ib_list->offsets[ib_list->count] = i<<2;
@@ -270,7 +268,7 @@
#endif
}
-static void adreno_dump_rb(struct kgsl_device *device, const void *buf,
+void adreno_dump_rb(struct kgsl_device *device, const void *buf,
size_t len, int start, int size)
{
const uint32_t *ptr = buf;
@@ -678,7 +676,7 @@
"MH_INTERRUPT: MASK = %08X | STATUS = %08X\n", r1, r2);
}
-static int adreno_dump(struct kgsl_device *device)
+int adreno_dump(struct kgsl_device *device, int manual)
{
unsigned int cp_ib1_base, cp_ib1_bufsz;
unsigned int cp_ib2_base, cp_ib2_bufsz;
@@ -694,21 +692,24 @@
unsigned int ts_processed = 0xdeaddead;
struct kgsl_context *context;
unsigned int context_id;
+ unsigned int rbbm_status;
static struct ib_list ib_list;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- struct kgsl_memdesc **reg_map;
- void *reg_map_array;
int num_iommu_units = 0;
mb();
- if (adreno_is_a2xx(adreno_dev))
- adreno_dump_a2xx(device);
- else if (adreno_is_a3xx(adreno_dev))
- adreno_dump_a3xx(device);
+ if (device->pm_dump_enable) {
+ if (adreno_is_a2xx(adreno_dev))
+ adreno_dump_a2xx(device);
+ else if (adreno_is_a3xx(adreno_dev))
+ adreno_dump_a3xx(device);
+ }
+
+ kgsl_regread(device, adreno_dev->gpudev->reg_rbbm_status, &rbbm_status);
pt_base = kgsl_mmu_get_current_ptbase(&device->mmu);
cur_pt_base = pt_base;
@@ -723,6 +724,18 @@
kgsl_regread(device, REG_CP_IB2_BASE, &cp_ib2_base);
kgsl_regread(device, REG_CP_IB2_BUFSZ, &cp_ib2_bufsz);
+ /* If postmortem dump is not enabled, dump minimal set and return */
+ if (!device->pm_dump_enable) {
+
+ KGSL_LOG_DUMP(device,
+ "RBBM STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
+ " | RPTR: %04X | WPTR: %04X\n",
+ rbbm_status, cp_ib1_base, cp_ib1_bufsz, cp_ib2_base,
+ cp_ib2_bufsz, cp_rb_rptr, cp_rb_wptr);
+
+ return 0;
+ }
+
kgsl_sharedmem_readl(&device->memstore,
(unsigned int *) &context_id,
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
@@ -731,7 +744,7 @@
if (context) {
ts_processed = kgsl_readtimestamp(device, context,
KGSL_TIMESTAMP_RETIRED);
- KGSL_LOG_DUMP(device, "CTXT: %d TIMESTM RTRD: %08X\n",
+ KGSL_LOG_DUMP(device, "FT CTXT: %d TIMESTM RTRD: %08X\n",
context->id, ts_processed);
} else
KGSL_LOG_DUMP(device, "BAD CTXT: %d\n", context_id);
@@ -781,13 +794,11 @@
memcpy(rb_copy+part1_c, rb_vaddr, (num_item-part1_c)<<2);
}
-
+ /* extract the latest ib commands from the buffer */
ib_list.count = 0;
i = 0;
-
- num_iommu_units = kgsl_mmu_get_reg_map_desc(&device->mmu,
- ®_map_array);
- reg_map = reg_map_array;
+ /* get the register mapped array in case we are using IOMMU */
+ num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
for (read_idx = 0; read_idx < num_item; ) {
uint32_t this_cmd = rb_copy[read_idx++];
if (adreno_cmd_is_ib(this_cmd)) {
@@ -801,27 +812,29 @@
ib_list.bases[i],
ib_list.sizes[i], 0);
} else if (this_cmd == cp_type0_packet(MH_MMU_PT_BASE, 1) ||
- (num_iommu_units && this_cmd == (reg_map[0]->gpuaddr +
- (KGSL_IOMMU_CONTEXT_USER << KGSL_IOMMU_CTX_SHIFT) +
- KGSL_IOMMU_TTBR0))) {
-
+ (num_iommu_units && this_cmd ==
+ kgsl_mmu_get_reg_gpuaddr(&device->mmu, 0,
+ KGSL_IOMMU_CONTEXT_USER,
+ KGSL_IOMMU_CTX_TTBR0))) {
KGSL_LOG_DUMP(device, "Current pagetable: %x\t"
"pagetable base: %x\n",
- kgsl_mmu_get_ptname_from_ptbase(cur_pt_base),
+ kgsl_mmu_get_ptname_from_ptbase(&device->mmu,
+ cur_pt_base),
cur_pt_base);
-
+ /* Set cur_pt_base to the new pagetable base */
cur_pt_base = rb_copy[read_idx++];
KGSL_LOG_DUMP(device, "New pagetable: %x\t"
"pagetable base: %x\n",
- kgsl_mmu_get_ptname_from_ptbase(cur_pt_base),
+ kgsl_mmu_get_ptname_from_ptbase(&device->mmu,
+ cur_pt_base),
cur_pt_base);
}
}
- if (num_iommu_units)
- kfree(reg_map_array);
+ /* Restore cur_pt_base back to the pt_base of
+ the process in whose context the GPU hung */
cur_pt_base = pt_base;
read_idx = (int)cp_rb_rptr - NUM_DWORDS_OF_RINGBUFFER_HISTORY;
@@ -832,7 +845,7 @@
cp_rb_base, cp_rb_rptr, cp_rb_wptr, read_idx);
adreno_dump_rb(device, rb_copy, num_item<<2, read_idx, rb_count);
- if (is_adreno_pm_ib_enabled()) {
+ if (device->pm_ib_enabled) {
for (read_idx = NUM_DWORDS_OF_RINGBUFFER_HISTORY;
read_idx >= 0; --read_idx) {
uint32_t this_cmd = rb_copy[read_idx];
@@ -862,8 +875,8 @@
}
}
-
- if (is_adreno_pm_regs_enabled()) {
+ /* Dump the registers if the user asked for it */
+ if (device->pm_regs_enabled) {
if (adreno_is_a20x(adreno_dev))
adreno_dump_regs(device, a200_registers,
a200_registers_count);
@@ -873,9 +886,14 @@
else if (adreno_is_a225(adreno_dev))
adreno_dump_regs(device, a225_registers,
a225_registers_count);
- else if (adreno_is_a3xx(adreno_dev))
+ else if (adreno_is_a3xx(adreno_dev)) {
adreno_dump_regs(device, a3xx_registers,
a3xx_registers_count);
+
+ if (adreno_is_a330(adreno_dev))
+ adreno_dump_regs(device, a330_registers,
+ a330_registers_count);
+ }
}
error_vfree:
@@ -883,72 +901,3 @@
end:
return result;
}
-
-
-int adreno_postmortem_dump(struct kgsl_device *device, int manual)
-{
- bool saved_nap;
- struct kgsl_pwrctrl *pwr = &device->pwrctrl;
-
- BUG_ON(device == NULL);
-
- kgsl_cffdump_hang(device->id);
-
-
-
- if (manual) {
- if (device->active_cnt != 0) {
- mutex_unlock(&device->mutex);
- wait_for_completion(&device->suspend_gate);
- mutex_lock(&device->mutex);
- }
-
- if (device->state == KGSL_STATE_ACTIVE)
- kgsl_idle(device);
-
- }
- KGSL_LOG_DUMP(device, "POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
- pwr->power_flags, pwr->active_pwrlevel);
-
- KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
- pwr->interval_timeout);
-
- KGSL_LOG_DUMP(device, "GRP_CLK = %lu ",
- kgsl_get_clkrate(pwr->grp_clks[0]));
-
- KGSL_LOG_DUMP(device, "BUS CLK = %lu ",
- kgsl_get_clkrate(pwr->ebi1_clk));
-
-
- del_timer_sync(&device->idle_timer);
- mutex_unlock(&device->mutex);
- flush_workqueue(device->work_queue);
- mutex_lock(&device->mutex);
-
- saved_nap = device->pwrctrl.nap_allowed;
- device->pwrctrl.nap_allowed = false;
-
-
- kgsl_pwrctrl_wake(device);
-
-
- kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
-
- adreno_dump(device);
-
-
- device->pwrctrl.nap_allowed = saved_nap;
-
-
- if (manual) {
- kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
-
-
- kgsl_pwrctrl_request_state(device, KGSL_STATE_SLEEP);
- kgsl_pwrctrl_sleep(device);
- }
-
- KGSL_DRV_ERR(device, "Dump Finished\n");
-
- return 0;
-}
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 90ff642..179027c 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -18,34 +18,44 @@
#include "kgsl.h"
#include "kgsl_sharedmem.h"
#include "kgsl_cffdump.h"
-#include "kgsl_trace.h"
#include "adreno.h"
#include "adreno_pm4types.h"
#include "adreno_ringbuffer.h"
-#include "adreno_debugfs.h"
#include "a2xx_reg.h"
#include "a3xx_reg.h"
#define GSL_RB_NOP_SIZEDWORDS 2
-#define CP_DEBUG_DEFAULT 0xA000000
+/*
+ * CP DEBUG settings for all cores:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * PROG_END_PTR_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+
+#define CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
{
BUG_ON(rb->wptr == 0);
+ /* Let the pwrscale policy know that new commands have
+ been submitted. */
kgsl_pwrscale_busy(rb->device);
+ /*synchronize memory before informing the hardware of the
+ *new commands.
+ */
mb();
adreno_regwrite(rb->device, REG_CP_RB_WPTR, rb->wptr);
}
-static void
-adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb, unsigned int numcmds,
- int wptr_ahead)
+static int
+adreno_ringbuffer_waitspace(struct adreno_ringbuffer *rb,
+ struct adreno_context *context,
+ unsigned int numcmds, int wptr_ahead)
{
int nopcount;
unsigned int freecmds;
@@ -54,13 +64,13 @@
unsigned long wait_time;
unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
unsigned long wait_time_part;
- unsigned int prev_reg_val[hang_detect_regs_count];
+ unsigned int prev_reg_val[ft_detect_regs_count];
memset(prev_reg_val, 0, sizeof(prev_reg_val));
-
+ /* if wptr ahead, fill the remaining with NOPs */
if (wptr_ahead) {
-
+ /* -1 for header */
nopcount = rb->sizedwords - rb->wptr - 1;
cmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
@@ -68,6 +78,10 @@
GSL_RB_WRITE(cmds, cmds_gpu, cp_nop_packet(nopcount));
+ /* Make sure that rptr is not 0 before submitting
+ * commands at the end of ringbuffer. We do not
+ * want the rptr and wptr to become equal when
+ * the ringbuffer is not empty */
do {
GSL_RB_GET_READPTR(rb, &rb->rptr);
} while (!rb->rptr);
@@ -81,7 +95,7 @@
wait_time = jiffies + wait_timeout;
wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
-
+ /* wait for space in ringbuffer */
while (1) {
GSL_RB_GET_READPTR(rb, &rb->rptr);
@@ -90,10 +104,12 @@
if (freecmds == 0 || freecmds > numcmds)
break;
+ /* Dont wait for timeout, detect hang faster.
+ */
if (time_after(jiffies, wait_time_part)) {
wait_time_part = jiffies +
msecs_to_jiffies(KGSL_TIMEOUT_PART);
- if ((adreno_hang_detect(rb->device,
+ if ((adreno_ft_detect(rb->device,
prev_reg_val))){
KGSL_DRV_ERR(rb->device,
"Hang detected while waiting for freespace in"
@@ -113,43 +129,56 @@
continue;
err:
- if (!adreno_dump_and_recover(rb->device)) {
+ if (!adreno_dump_and_exec_ft(rb->device)) {
+ if (context && context->flags & CTXT_FLAGS_GPU_HANG) {
+ KGSL_CTXT_WARN(rb->device,
+ "Context %p caused a gpu hang. Will not accept commands for context %d\n",
+ context, context->id);
+ return -EDEADLK;
+ }
wait_time = jiffies + wait_timeout;
} else {
-
+ /* GPU is hung and fault tolerance failed */
BUG();
}
}
+ return 0;
}
unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
- unsigned int numcmds)
+ struct adreno_context *context,
+ unsigned int numcmds)
{
- unsigned int *ptr = NULL;
-
+ unsigned int *ptr = NULL;
+ int ret = 0;
BUG_ON(numcmds >= rb->sizedwords);
GSL_RB_GET_READPTR(rb, &rb->rptr);
-
+ /* check for available space */
if (rb->wptr >= rb->rptr) {
-
-
+ /* wptr ahead or equal to rptr */
+ /* reserve dwords for nop packet */
if ((rb->wptr + numcmds) > (rb->sizedwords -
GSL_RB_NOP_SIZEDWORDS))
- adreno_ringbuffer_waitspace(rb, numcmds, 1);
+ ret = adreno_ringbuffer_waitspace(rb, context,
+ numcmds, 1);
} else {
-
+ /* wptr behind rptr */
if ((rb->wptr + numcmds) >= rb->rptr)
- adreno_ringbuffer_waitspace(rb, numcmds, 0);
-
-
- if ((rb->wptr + numcmds) > (rb->sizedwords -
+ ret = adreno_ringbuffer_waitspace(rb, context,
+ numcmds, 0);
+ /* check for remaining space */
+ /* reserve dwords for nop packet */
+ if (!ret && (rb->wptr + numcmds) > (rb->sizedwords -
GSL_RB_NOP_SIZEDWORDS))
- adreno_ringbuffer_waitspace(rb, numcmds, 1);
+ ret = adreno_ringbuffer_waitspace(rb, context,
+ numcmds, 1);
}
- ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
- rb->wptr += numcmds;
+ if (!ret) {
+ ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
+ rb->wptr += numcmds;
+ }
return ptr;
}
@@ -195,7 +224,7 @@
if (ret)
goto err;
-
+ /* PM4 size is 3 dword aligned plus 1 dword of version */
if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) {
KGSL_DRV_ERR(device, "Bad firmware size: %d\n", len);
ret = -EINVAL;
@@ -226,10 +255,8 @@
KGSL_DRV_INFO(device, "loading pm4 ucode version: %d\n",
adreno_dev->pm4_fw_version);
- if (adreno_is_a3xx(adreno_dev))
- adreno_regwrite(device, REG_CP_DEBUG, CP_DEBUG_DEFAULT);
- else
- adreno_regwrite(device, REG_CP_DEBUG, 0x02000000);
+
+ adreno_regwrite(device, REG_CP_DEBUG, CP_DEBUG_DEFAULT);
adreno_regwrite(device, REG_CP_ME_RAM_WADDR, 0);
for (i = 1; i < adreno_dev->pm4_fw_size; i++)
adreno_regwrite(device, REG_CP_ME_RAM_DATA,
@@ -252,7 +279,7 @@
if (ret)
goto err;
-
+ /* PFP size shold be dword aligned */
if (len % sizeof(uint32_t) != 0) {
KGSL_DRV_ERR(device, "Bad firmware size: %d\n", len);
ret = -EINVAL;
@@ -281,20 +308,21 @@
}
KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
- adreno_dev->pfp_fw_version);
+ adreno_dev->pfp_fw_version);
adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0);
for (i = 1; i < adreno_dev->pfp_fw_size; i++)
adreno_regwrite(device,
- adreno_dev->gpudev->reg_cp_pfp_ucode_data,
- adreno_dev->pfp_fw[i]);
+ adreno_dev->gpudev->reg_cp_pfp_ucode_data,
+ adreno_dev->pfp_fw[i]);
+
return 0;
}
int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
{
int status;
-
+ /*cp_rb_cntl_u cp_rb_cntl; */
union reg_cp_rb_cntl cp_rb_cntl;
unsigned int rb_cntl;
struct kgsl_device *device = rb->device;
@@ -317,25 +345,34 @@
(rb->memptrs_desc.gpuaddr
+ GSL_RB_MEMPTRS_WPTRPOLL_OFFSET));
-
+ /* setup WPTR delay */
adreno_regwrite(device, REG_CP_RB_WPTR_DELAY,
- 0 );
+ 0 /*0x70000010 */);
}
-
+ /*setup REG_CP_RB_CNTL */
adreno_regread(device, REG_CP_RB_CNTL, &rb_cntl);
cp_rb_cntl.val = rb_cntl;
+ /*
+ * The size of the ringbuffer in the hardware is the log2
+ * representation of the size in quadwords (sizedwords / 2)
+ */
cp_rb_cntl.f.rb_bufsz = ilog2(rb->sizedwords >> 1);
+ /*
+ * Specify the quadwords to read before updating mem RPTR.
+ * Like above, pass the log2 representation of the blocksize
+ * in quadwords.
+ */
cp_rb_cntl.f.rb_blksz = ilog2(KGSL_RB_BLKSIZE >> 3);
if (adreno_is_a2xx(adreno_dev)) {
-
+ /* WPTR polling */
cp_rb_cntl.f.rb_poll_en = GSL_RB_CNTL_POLL_EN;
}
-
+ /* mem RPTR writebacks */
cp_rb_cntl.f.rb_no_update = GSL_RB_CNTL_NO_UPDATE;
adreno_regwrite(device, REG_CP_RB_CNTL, cp_rb_cntl.val);
@@ -347,10 +384,10 @@
GSL_RB_MEMPTRS_RPTR_OFFSET);
if (adreno_is_a3xx(adreno_dev)) {
-
+ /* enable access protection to privileged registers */
adreno_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
-
+ /* RBBM registers */
adreno_regwrite(device, A3XX_CP_PROTECT_REG_0, 0x63000040);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_1, 0x62000080);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_2, 0x600000CC);
@@ -358,26 +395,26 @@
adreno_regwrite(device, A3XX_CP_PROTECT_REG_4, 0x64000140);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_5, 0x66000400);
-
+ /* CP registers */
adreno_regwrite(device, A3XX_CP_PROTECT_REG_6, 0x65000700);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_7, 0x610007D8);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_8, 0x620007E0);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_9, 0x61001178);
adreno_regwrite(device, A3XX_CP_PROTECT_REG_A, 0x64001180);
-
+ /* RB registers */
adreno_regwrite(device, A3XX_CP_PROTECT_REG_B, 0x60003300);
-
+ /* VBIF registers */
adreno_regwrite(device, A3XX_CP_PROTECT_REG_C, 0x6B00C000);
}
if (adreno_is_a2xx(adreno_dev)) {
-
+ /* explicitly clear all cp interrupts */
adreno_regwrite(device, REG_CP_INT_ACK, 0xFFFFFFFF);
}
-
+ /* setup scratch/timestamp */
adreno_regwrite(device, REG_SCRATCH_ADDR, device->memstore.gpuaddr +
KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
soptimestamp));
@@ -385,31 +422,30 @@
adreno_regwrite(device, REG_SCRATCH_UMSK,
GSL_RB_MEMPTRS_SCRATCH_MASK);
-
-
+ /* load the CP ucode */
status = adreno_ringbuffer_load_pm4_ucode(device);
if (status != 0)
return status;
-
+ /* load the prefetch parser ucode */
status = adreno_ringbuffer_load_pfp_ucode(device);
if (status != 0)
return status;
-
+ /* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
if (adreno_is_a305(adreno_dev) || adreno_is_a320(adreno_dev))
adreno_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x000E0602);
rb->rptr = 0;
rb->wptr = 0;
-
+ /* clear ME_HALT to start micro engine */
adreno_regwrite(device, REG_CP_ME_CNTL, 0);
-
+ /* ME init is GPU specific, so jump into the sub-function */
adreno_dev->gpudev->rb_init(adreno_dev, rb);
-
+ /* idle device to validate ME INIT */
status = adreno_idle(device);
if (status == 0)
@@ -420,9 +456,13 @@
void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb)
{
+ struct kgsl_device *device = rb->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
if (rb->flags & KGSL_FLAGS_STARTED) {
-
- adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000);
+ if (adreno_is_a200(adreno_dev))
+ adreno_regwrite(rb->device, REG_CP_ME_CNTL, 0x10000000);
+
rb->flags &= ~KGSL_FLAGS_STARTED;
}
}
@@ -434,9 +474,14 @@
struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
rb->device = device;
+ /*
+ * It is silly to convert this to words and then back to bytes
+ * immediately below, but most of the rest of the code deals
+ * in words, so we might as well only do the math once
+ */
rb->sizedwords = KGSL_RB_SIZE >> 2;
-
+ /* allocate memory for ringbuffer */
status = kgsl_allocate_contiguous(&rb->buffer_desc,
(rb->sizedwords << 2));
@@ -445,7 +490,9 @@
return status;
}
-
+ /* allocate memory for polling and timestamps */
+ /* This really can be at 4 byte alignment boundry but for using MMU
+ * we need to make it at page boundary */
status = kgsl_allocate_contiguous(&rb->memptrs_desc,
sizeof(struct kgsl_rbmemptrs));
@@ -454,7 +501,7 @@
return status;
}
-
+ /* overlay structure on memptrs memory */
rb->memptrs = (struct kgsl_rbmemptrs *) rb->memptrs_desc.hostptr;
return 0;
@@ -480,42 +527,67 @@
adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
struct adreno_context *context,
unsigned int flags, unsigned int *cmds,
- int sizedwords)
+ int sizedwords, uint32_t timestamp)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
unsigned int *ringcmds;
- unsigned int timestamp;
unsigned int total_sizedwords = sizedwords;
unsigned int i;
unsigned int rcmd_gpu;
unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
unsigned int gpuaddr = rb->device->memstore.gpuaddr;
- if (context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS))
+ /*
+ * if the context was not created with per context timestamp
+ * support, we must use the global timestamp since issueibcmds
+ * will be returning that one.
+ */
+ if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS)
context_id = context->id;
+ if ((context && context->flags & CTXT_FLAGS_USER_GENERATED_TS) &&
+ (!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))) {
+ if (timestamp_cmp(rb->timestamp[context_id],
+ timestamp) >= 0) {
+ KGSL_DRV_ERR(rb->device,
+ "Invalid user generated ts <%d:0x%x>, "
+ "less than last issued ts <%d:0x%x>\n",
+ context_id, timestamp, context_id,
+ rb->timestamp[context_id]);
+ return -ERANGE;
+ }
+ }
+
+ /* reserve space to temporarily turn off protected mode
+ * error checking if needed
+ */
total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
- total_sizedwords += !(flags & KGSL_CMD_FLAGS_NO_TS_CMP) ? 7 : 0;
-
+ /* 2 dwords to store the start of command sequence */
total_sizedwords += 2;
+ /* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
+ total_sizedwords += context ? 13 : 0;
+
if (adreno_is_a3xx(adreno_dev))
total_sizedwords += 7;
- total_sizedwords += 2;
- if (context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
- total_sizedwords += 3;
- total_sizedwords += 4;
- total_sizedwords += 3;
+ total_sizedwords += 2; /* scratchpad ts for fault tolerance */
+ if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS &&
+ !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+ total_sizedwords += 3; /* sop timestamp */
+ total_sizedwords += 4; /* eop timestamp */
+ total_sizedwords += 3; /* global timestamp without cache
+ * flush for non-zero context */
} else {
- total_sizedwords += 4;
+ total_sizedwords += 4; /* global timestamp for fault tolerance*/
}
- ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
- if (context && (context->flags & CTXT_FLAGS_GPU_HANG)) {
- KGSL_CTXT_WARN(rb->device,
- "Context %p caused a gpu hang. Will not accept commands for context %d\n",
- context, context->id);
+ ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
+ if (!ringcmds) {
+ /*
+ * We could not allocate space in ringbuffer, just return the
+ * last timestamp
+ */
return rb->timestamp[context_id];
}
@@ -526,7 +598,7 @@
GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_CMD_IDENTIFIER);
if (flags & KGSL_CMD_FLAGS_PMODE) {
-
+ /* disable protected mode error checking */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
@@ -538,58 +610,67 @@
}
if (flags & KGSL_CMD_FLAGS_PMODE) {
-
+ /* re-enable protected mode error checking */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_SET_PROTECTED_MODE, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, 1);
}
-
+ /* always increment the global timestamp. once. */
rb->timestamp[KGSL_MEMSTORE_GLOBAL]++;
- if (context) {
+
+ /* Do not update context's timestamp for internal submissions */
+ if (context && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
if (context_id == KGSL_MEMSTORE_GLOBAL)
- rb->timestamp[context_id] =
+ rb->timestamp[context->id] =
rb->timestamp[KGSL_MEMSTORE_GLOBAL];
+ else if (context->flags & CTXT_FLAGS_USER_GENERATED_TS)
+ rb->timestamp[context_id] = timestamp;
else
rb->timestamp[context_id]++;
}
timestamp = rb->timestamp[context_id];
-
+ /* scratchpad ts for fault tolerance */
GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_type0_packet(REG_CP_TIMESTAMP, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
if (adreno_is_a3xx(adreno_dev)) {
+ /*
+ * FLush HLSQ lazy updates to make sure there are no
+ * rsources pending for indirect loads after the timestamp
+ */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_EVENT_WRITE, 1));
- GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07);
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x07); /* HLSQ_FLUSH */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x00);
}
- if (context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
-
+ if (context && context->flags & CTXT_FLAGS_PER_CONTEXT_TS
+ && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+ /* start-of-pipeline timestamp */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_MEM_WRITE, 2));
GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(context->id, soptimestamp)));
+ KGSL_MEMSTORE_OFFSET(context_id, soptimestamp)));
GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
+ /* end-of-pipeline timestamp */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_EVENT_WRITE, 3));
GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp)));
+ KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_MEM_WRITE, 2));
GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- eoptimestamp)));
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp)));
GSL_RB_WRITE(ringcmds, rcmd_gpu,
rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
} else {
@@ -597,14 +678,13 @@
cp_type3_packet(CP_EVENT_WRITE, 3));
GSL_RB_WRITE(ringcmds, rcmd_gpu, CACHE_FLUSH_TS);
GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- eoptimestamp)));
+ KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+ eoptimestamp)));
GSL_RB_WRITE(ringcmds, rcmd_gpu,
- rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
+ rb->timestamp[KGSL_MEMSTORE_GLOBAL]);
}
-
- if (!(flags & KGSL_CMD_FLAGS_NO_TS_CMP)) {
-
+ if (context) {
+ /* Conditional execution based on memory values */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_COND_EXEC, 4));
GSL_RB_WRITE(ringcmds, rcmd_gpu, (gpuaddr +
@@ -614,15 +694,33 @@
KGSL_MEMSTORE_OFFSET(
context_id, ref_wait_ts)) >> 2);
GSL_RB_WRITE(ringcmds, rcmd_gpu, timestamp);
-
- GSL_RB_WRITE(ringcmds, rcmd_gpu, 2);
+ /* # of conditional command DWORDs */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 8);
+
+ /* Clear the ts_cmp_enable for the context */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_MEM_WRITE, 2));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, gpuaddr +
+ KGSL_MEMSTORE_OFFSET(
+ context_id, ts_cmp_enable));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x0);
+
+ /* Clear the ts_cmp_enable for the global timestamp */
+ GSL_RB_WRITE(ringcmds, rcmd_gpu,
+ cp_type3_packet(CP_MEM_WRITE, 2));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, gpuaddr +
+ KGSL_MEMSTORE_OFFSET(
+ KGSL_MEMSTORE_GLOBAL, ts_cmp_enable));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, 0x0);
+
+ /* Trigger the interrupt */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_INTERRUPT, 1));
GSL_RB_WRITE(ringcmds, rcmd_gpu, CP_INT_CNTL__RB_INT_MASK);
}
if (adreno_is_a3xx(adreno_dev)) {
-
+ /* Dummy set-constant to trigger context rollover */
GSL_RB_WRITE(ringcmds, rcmd_gpu,
cp_type3_packet(CP_SET_CONSTANT, 2));
GSL_RB_WRITE(ringcmds, rcmd_gpu,
@@ -630,6 +728,11 @@
GSL_RB_WRITE(ringcmds, rcmd_gpu, 0);
}
+ if (flags & KGSL_CMD_FLAGS_EOF) {
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, cp_nop_packet(1));
+ GSL_RB_WRITE(ringcmds, rcmd_gpu, KGSL_END_OF_FRAME_IDENTIFIER);
+ }
+
adreno_ringbuffer_submit(rb);
return timestamp;
@@ -648,7 +751,11 @@
if (device->state & KGSL_STATE_HUNG)
return kgsl_readtimestamp(device, KGSL_MEMSTORE_GLOBAL,
KGSL_TIMESTAMP_RETIRED);
- return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds, sizedwords);
+
+ flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
+
+ return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds,
+ sizedwords, 0);
}
static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
@@ -702,7 +809,7 @@
case CP_IM_STORE:
case CP_LOAD_STATE:
break;
-
+ /* these shouldn't come from userspace */
case CP_ME_INIT:
case CP_SET_PROTECTED_MODE:
default:
@@ -727,13 +834,19 @@
return true;
}
+/*
+ * Traverse IBs and dump them to test vector. Detect swap by inspecting
+ * register writes, keeping note of the current state, and dump
+ * framebuffer config to test vector
+ */
static bool _parse_ibs(struct kgsl_device_private *dev_priv,
uint gpuaddr, int sizedwords)
{
- static uint level;
+ static uint level; /* recursion level */
bool ret = false;
uint *hostaddr, *hoststart;
- int dwords_left = sizedwords;
+ int dwords_left = sizedwords; /* dwords left in the current command
+ buffer */
struct kgsl_mem_entry *entry;
spin_lock(&dev_priv->process_priv->mem_lock);
@@ -763,17 +876,17 @@
mb();
while (dwords_left > 0) {
bool cur_ret = true;
- int count = 0;
+ int count = 0; /* dword count including packet header */
switch (*hostaddr >> 30) {
- case 0x0:
+ case 0x0: /* type-0 */
count = (*hostaddr >> 16)+2;
cur_ret = _handle_type0(dev_priv, hostaddr);
break;
- case 0x1:
+ case 0x1: /* type-1 */
count = 2;
break;
- case 0x3:
+ case 0x3: /* type-3 */
count = ((*hostaddr >> 16) & 0x3fff) + 2;
cur_ret = _handle_type3(dev_priv, hostaddr);
break;
@@ -804,7 +917,7 @@
goto done;
}
-
+ /* jump to next packet */
dwords_left -= count;
hostaddr += count;
if (dwords_left < 0) {
@@ -852,9 +965,6 @@
unsigned int i;
struct adreno_context *drawctxt;
unsigned int start_index = 0;
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE
- struct kgsl_pwrctrl *pwr = &device->pwrctrl;
-#endif
if (device->state & KGSL_STATE_HUNG)
return -EBUSY;
@@ -865,12 +975,22 @@
drawctxt = context->devctxt;
if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
- KGSL_CTXT_WARN(device, "Context %p caused a gpu hang.."
+ KGSL_CTXT_ERR(device, "proc %s failed fault tolerance"
" will not accept commands for context %d\n",
- drawctxt, drawctxt->id);
+ drawctxt->pid_name, drawctxt->id);
return -EDEADLK;
}
+ if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+ KGSL_CTXT_ERR(device,
+ "proc %s triggered fault tolerance"
+ " skipping commands for context till EOF %d\n",
+ drawctxt->pid_name, drawctxt->id);
+ if (flags & KGSL_CMD_FLAGS_EOF)
+ drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+ numibs = 0;
+ }
+
cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
GFP_KERNEL);
if (!link) {
@@ -879,6 +999,9 @@
return -ENOMEM;
}
+ /*When preamble is enabled, the preamble buffer with state restoration
+ commands are stored in the first node of the IB chain. We can skip that
+ if a context switch hasn't occured */
if (drawctxt->flags & CTXT_FLAGS_PREAMBLE &&
adreno_dev->drawctxt_active == drawctxt)
@@ -913,24 +1036,12 @@
kgsl_mmu_pt_get_flags(device->mmu.hwpagetable,
device->id));
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE
- if(device->id == 0 && device->prev_pid != -1 && device->prev_pid != task_tgid_nr(current)) {
- trace_kgsl_usage(device, KGSL_PWRFLAGS_ON, dev_priv->process_priv->pid, device->gputime.total, device->gputime.busy,
- pwr->active_pwrlevel, pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq);
- device->prev_pid = task_tgid_nr(current);
- }
-#endif
-
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- if(device->current_process_priv == NULL || device->current_process_priv->pid != dev_priv->process_priv->pid)
- device->current_process_priv = dev_priv->process_priv;
-#endif
-
adreno_drawctxt_switch(adreno_dev, drawctxt, flags);
*timestamp = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
- drawctxt, 0,
- &link[0], (cmds - link));
+ drawctxt,
+ (flags & KGSL_CMD_FLAGS_EOF),
+ &link[0], (cmds - link), *timestamp);
KGSL_CMD_INFO(device, "ctxt %d g %08x numibs %d ts %d\n",
context->id, (unsigned int)ibdesc, numibs, *timestamp);
@@ -938,156 +1049,23 @@
kfree(link);
#ifdef CONFIG_MSM_KGSL_CFF_DUMP
+ /*
+ * insert wait for idle after every IB1
+ * this is conservative but works reliably and is ok
+ * even for performance simulations
+ */
adreno_idle(device);
#endif
- if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_RECOVERED)
- return -EDEADLK;
- else
+
+ /*
+ * If context hung and recovered then return error so that the
+ * application may handle it
+ */
+ if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) {
+ drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT;
+ return -EPROTO;
+ } else
return 0;
-
-}
-
-static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
- unsigned int *ptr,
- bool inc)
-{
- int status = -EINVAL;
- unsigned int val1;
- unsigned int size = rb->buffer_desc.size;
- unsigned int start_ptr = *ptr;
-
- while ((start_ptr / sizeof(unsigned int)) != rb->wptr) {
- if (inc)
- start_ptr = adreno_ringbuffer_inc_wrapped(start_ptr,
- size);
- else
- start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr,
- size);
- kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr);
- if (KGSL_CMD_IDENTIFIER == val1) {
- if ((start_ptr / sizeof(unsigned int)) != rb->wptr)
- start_ptr = adreno_ringbuffer_dec_wrapped(
- start_ptr, size);
- *ptr = start_ptr;
- status = 0;
- break;
- }
- }
- return status;
-}
-
-static int _find_cmd_seq_after_eop_ts(struct adreno_ringbuffer *rb,
- unsigned int *rb_rptr,
- unsigned int global_eop,
- bool inc)
-{
- int status = -EINVAL;
- unsigned int temp_rb_rptr = *rb_rptr;
- unsigned int size = rb->buffer_desc.size;
- unsigned int val[3];
- int i = 0;
- bool check = false;
-
- if (inc && temp_rb_rptr / sizeof(unsigned int) != rb->wptr)
- return status;
-
- do {
- if (!inc)
- temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
- temp_rb_rptr, size);
- kgsl_sharedmem_readl(&rb->buffer_desc, &val[i],
- temp_rb_rptr);
-
- if (check && ((inc && val[i] == global_eop) ||
- (!inc && (val[i] ==
- cp_type3_packet(CP_MEM_WRITE, 2) ||
- val[i] == CACHE_FLUSH_TS)))) {
- i = (i + 2) % 3;
- if (val[i] == rb->device->memstore.gpuaddr +
- KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
- eoptimestamp)) {
- int j = ((i + 2) % 3);
- if ((inc && (val[j] == CACHE_FLUSH_TS ||
- val[j] == cp_type3_packet(
- CP_MEM_WRITE, 2))) ||
- (!inc && val[j] == global_eop)) {
-
- status = 0;
- break;
- }
- }
- i = (i + 1) % 3;
- }
- if (inc)
- temp_rb_rptr = adreno_ringbuffer_inc_wrapped(
- temp_rb_rptr, size);
-
- i = (i + 1) % 3;
- if (2 == i)
- check = true;
- } while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr);
- if (!status) {
- status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false);
- if (!status) {
- *rb_rptr = temp_rb_rptr;
- KGSL_DRV_ERR(rb->device,
- "Offset of cmd sequence after eop timestamp: 0x%x\n",
- temp_rb_rptr / sizeof(unsigned int));
- }
- }
- if (status)
- KGSL_DRV_ERR(rb->device,
- "Failed to find the command sequence after eop timestamp\n");
- return status;
-}
-
-static int _find_hanging_ib_sequence(struct adreno_ringbuffer *rb,
- unsigned int *rb_rptr,
- unsigned int ib1)
-{
- int status = -EINVAL;
- unsigned int temp_rb_rptr = *rb_rptr;
- unsigned int size = rb->buffer_desc.size;
- unsigned int val[2];
- int i = 0;
- bool check = false;
- bool ctx_switch = false;
-
- while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
- kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
-
- if (check && val[i] == ib1) {
-
- i = (i + 1) % 2;
- if (adreno_cmd_is_ib(val[i])) {
-
- status = _find_start_of_cmd_seq(rb,
- &temp_rb_rptr, false);
- KGSL_DRV_ERR(rb->device,
- "Found the hanging IB at offset 0x%x\n",
- temp_rb_rptr / sizeof(unsigned int));
- break;
- }
- i = (i + 1) % 2;
- }
- if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
- if (ctx_switch) {
- KGSL_DRV_ERR(rb->device,
- "Context switch encountered before bad "
- "IB found\n");
- break;
- }
- ctx_switch = true;
- }
- i = (i + 1) % 2;
- if (1 == i)
- check = true;
- temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
- size);
- }
- if (!status)
- *rb_rptr = temp_rb_rptr;
- return status;
}
static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
@@ -1100,11 +1078,11 @@
bool check = false;
bool cmd_start = false;
-
+ /* Go till the start of the ib sequence and turn on preamble */
while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
if (check && KGSL_START_OF_IB_IDENTIFIER == val[i]) {
-
+ /* decrement i */
i = (i + 1) % 2;
if (val[i] == cp_nop_packet(4)) {
temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
@@ -1112,11 +1090,14 @@
kgsl_sharedmem_writel(&rb->buffer_desc,
temp_rb_rptr, cp_nop_packet(1));
}
- KGSL_DRV_ERR(rb->device,
+ KGSL_FT_INFO(rb->device,
"Turned preamble on at offset 0x%x\n",
temp_rb_rptr / 4);
break;
}
+ /* If you reach beginning of next command sequence then exit
+ * First command encountered is the current one so don't break
+ * on that. */
if (KGSL_CMD_IDENTIFIER == val[i]) {
if (cmd_start)
break;
@@ -1131,119 +1112,122 @@
}
}
-static void _copy_valid_rb_content(struct adreno_ringbuffer *rb,
- unsigned int rb_rptr, unsigned int *temp_rb_buffer,
- int *rb_size, unsigned int *bad_rb_buffer,
- int *bad_rb_size,
- int *last_valid_ctx_id)
+void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
+ struct adreno_ft_data *ft_data)
{
- unsigned int good_rb_idx = 0, cmd_start_idx = 0;
+ struct kgsl_device *device = rb->device;
+ unsigned int rb_rptr = ft_data->start_of_replay_cmds;
+ unsigned int good_rb_idx = 0, bad_rb_idx = 0, temp_rb_idx = 0;
+ unsigned int last_good_cmd_end_idx = 0, last_bad_cmd_end_idx = 0;
+ unsigned int cmd_start_idx = 0;
unsigned int val1 = 0;
- struct kgsl_context *k_ctxt;
- struct adreno_context *a_ctxt;
- unsigned int bad_rb_idx = 0;
int copy_rb_contents = 0;
unsigned int temp_rb_rptr;
+ struct kgsl_context *k_ctxt;
+ struct adreno_context *a_ctxt;
unsigned int size = rb->buffer_desc.size;
- unsigned int good_cmd_start_idx = 0;
+ unsigned int *temp_rb_buffer = ft_data->rb_buffer;
+ int *rb_size = &ft_data->rb_size;
+ unsigned int *bad_rb_buffer = ft_data->bad_rb_buffer;
+ int *bad_rb_size = &ft_data->bad_rb_size;
+ unsigned int *good_rb_buffer = ft_data->good_rb_buffer;
+ int *good_rb_size = &ft_data->good_rb_size;
+ /*
+ * If the start index from where commands need to be copied is invalid
+ * then no need to save off any commands
+ */
+ if (0xFFFFFFFF == ft_data->start_of_replay_cmds)
+ return;
+
+ k_ctxt = idr_find(&device->context_idr, ft_data->context_id);
+ if (k_ctxt) {
+ a_ctxt = k_ctxt->devctxt;
+ if (a_ctxt->flags & CTXT_FLAGS_PREAMBLE)
+ _turn_preamble_on_for_ib_seq(rb, rb_rptr);
+ }
+ k_ctxt = NULL;
+
+ /* Walk the rb from the context switch. Omit any commands
+ * for an invalid context. */
while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
if (KGSL_CMD_IDENTIFIER == val1) {
- cmd_start_idx = bad_rb_idx - 1;
- if (copy_rb_contents)
- good_cmd_start_idx = good_rb_idx - 1;
+ /* Start is the NOP dword that comes before
+ * KGSL_CMD_IDENTIFIER */
+ cmd_start_idx = temp_rb_idx - 1;
+ if ((copy_rb_contents) && (good_rb_idx))
+ last_good_cmd_end_idx = good_rb_idx - 1;
+ if ((!copy_rb_contents) && (bad_rb_idx))
+ last_bad_cmd_end_idx = bad_rb_idx - 1;
}
-
+ /* check for context switch indicator */
if (val1 == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
unsigned int temp_idx, val2;
-
+ /* increment by 3 to get to the context_id */
temp_rb_rptr = rb_rptr + (3 * sizeof(unsigned int)) %
size;
kgsl_sharedmem_readl(&rb->buffer_desc, &val2,
temp_rb_rptr);
+ /* if context switches to a context that did not cause
+ * hang then start saving the rb contents as those
+ * commands can be executed */
k_ctxt = idr_find(&rb->device->context_idr, val2);
if (k_ctxt) {
a_ctxt = k_ctxt->devctxt;
+ /* If we are changing to a good context and were not
+ * copying commands then copy over commands to the good
+ * context */
if (!copy_rb_contents && ((k_ctxt &&
!(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) ||
!k_ctxt)) {
for (temp_idx = cmd_start_idx;
- temp_idx < bad_rb_idx;
+ temp_idx < temp_rb_idx;
temp_idx++)
- temp_rb_buffer[good_rb_idx++] =
- bad_rb_buffer[temp_idx];
- *last_valid_ctx_id = val2;
+ good_rb_buffer[good_rb_idx++] =
+ temp_rb_buffer[temp_idx];
+ ft_data->last_valid_ctx_id = val2;
copy_rb_contents = 1;
+ /* remove the good commands from bad buffer */
+ bad_rb_idx = last_bad_cmd_end_idx;
} else if (copy_rb_contents && k_ctxt &&
(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) {
- good_rb_idx = good_cmd_start_idx;
+
+ /* If we are changing back to a bad context
+ * from good ctxt and were not copying commands
+ * to bad ctxt then copy over commands to
+ * the bad context */
+ for (temp_idx = cmd_start_idx;
+ temp_idx < temp_rb_idx;
+ temp_idx++)
+ bad_rb_buffer[bad_rb_idx++] =
+ temp_rb_buffer[temp_idx];
+ /* If we are changing to bad context then
+ * remove the dwords we copied for this
+ * sequence from the good buffer */
+ good_rb_idx = last_good_cmd_end_idx;
copy_rb_contents = 0;
}
}
}
if (copy_rb_contents)
- temp_rb_buffer[good_rb_idx++] = val1;
- bad_rb_buffer[bad_rb_idx++] = val1;
+ good_rb_buffer[good_rb_idx++] = val1;
+ else
+ bad_rb_buffer[bad_rb_idx++] = val1;
+
+ /* Copy both good and bad commands to temp buffer */
+ temp_rb_buffer[temp_rb_idx++] = val1;
rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, size);
}
- *rb_size = good_rb_idx;
+ *good_rb_size = good_rb_idx;
*bad_rb_size = bad_rb_idx;
-}
-
-int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
- struct adreno_recovery_data *rec_data)
-{
- int status;
- struct kgsl_device *device = rb->device;
- unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
- struct kgsl_context *context;
- struct adreno_context *adreno_context;
-
- context = idr_find(&device->context_idr, rec_data->context_id);
-
-
- status = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
- rec_data->global_eop + 1, false);
- if (status)
- goto done;
-
- if (context) {
- adreno_context = context->devctxt;
-
- if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
- if (rec_data->ib1) {
- status = _find_hanging_ib_sequence(rb, &rb_rptr,
- rec_data->ib1);
- if (status)
- goto copy_rb_contents;
- }
- _turn_preamble_on_for_ib_seq(rb, rb_rptr);
- } else {
- status = -EINVAL;
- }
- }
-
-copy_rb_contents:
- _copy_valid_rb_content(rb, rb_rptr, rec_data->rb_buffer,
- &rec_data->rb_size,
- rec_data->bad_rb_buffer,
- &rec_data->bad_rb_size,
- &rec_data->last_valid_ctx_id);
- if (status) {
- rec_data->bad_rb_size = 0;
- status = 0;
- }
- if (!context)
- rec_data->rb_size = 0;
-done:
- return status;
+ *rb_size = temp_rb_idx;
}
void
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 7560848..fa03c05 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,16 +13,21 @@
#ifndef __ADRENO_RINGBUFFER_H
#define __ADRENO_RINGBUFFER_H
+/*
+ * Adreno ringbuffer sizes in bytes - these are converted to
+ * the appropriate log2 values in the code
+ */
#define KGSL_RB_SIZE (32 * 1024)
#define KGSL_RB_BLKSIZE 16
+/* CP timestamp register */
#define REG_CP_TIMESTAMP REG_SCRATCH_REG0
struct kgsl_device;
struct kgsl_device_private;
-struct adreno_recovery_data;
+struct adreno_ft_data;
#define GSL_RB_MEMPTRS_SCRATCH_COUNT 8
struct kgsl_rbmemptrs {
@@ -45,11 +50,11 @@
struct kgsl_memdesc memptrs_desc;
struct kgsl_rbmemptrs *memptrs;
-
+ /*ringbuffer size */
unsigned int sizedwords;
- unsigned int wptr;
- unsigned int rptr;
+ unsigned int wptr; /* write pointer offset in dwords from baseaddr */
+ unsigned int rptr; /* read pointer offset in dwords from baseaddr */
unsigned int timestamp[KGSL_MEMSTORE_MAX];
};
@@ -64,16 +69,23 @@
gpuaddr += sizeof(uint); \
} while (0)
+/* enable timestamp (...scratch0) memory shadowing */
#define GSL_RB_MEMPTRS_SCRATCH_MASK 0x1
-#define GSL_RB_CNTL_NO_UPDATE 0x0
+/* mem rptr */
+#define GSL_RB_CNTL_NO_UPDATE 0x0 /* enable */
#define GSL_RB_GET_READPTR(rb, data) \
do { \
*(data) = rb->memptrs->rptr; \
} while (0)
-#define GSL_RB_CNTL_POLL_EN 0x0
+#define GSL_RB_CNTL_POLL_EN 0x0 /* disable */
+/*
+ * protected mode error checking below register address 0x800
+ * note: if CP_INTERRUPT packet is used then checking needs
+ * to change to below register address 0x7C8
+ */
#define GSL_RB_PROTECTED_MODE_CONTROL 0x200001F2
int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
@@ -102,15 +114,16 @@
void kgsl_cp_intrcallback(struct kgsl_device *device);
-int adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
- struct adreno_recovery_data *rec_data);
+void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
+ struct adreno_ft_data *ft_data);
void
adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff,
int num_rb_contents);
unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
- unsigned int numcmds);
+ struct adreno_context *context,
+ unsigned int numcmds);
int adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device);
@@ -124,16 +137,18 @@
return rb->wptr + rb->sizedwords - rptr;
}
+/* Increment a value by 4 bytes with wrap-around based on size */
static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val,
unsigned int size)
{
return (val + sizeof(unsigned int)) % size;
}
+/* Decrement a value by 4 bytes with wrap-around based on size */
static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val,
unsigned int size)
{
return (val + size - sizeof(unsigned int)) % size;
}
-#endif
+#endif /* __ADRENO_RINGBUFFER_H */
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index a412c12..f23586e 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,13 +19,16 @@
#include "a2xx_reg.h"
#include "a3xx_reg.h"
+/* Number of dwords of ringbuffer history to record */
#define NUM_DWORDS_OF_RINGBUFFER_HISTORY 100
+/* Maintain a list of the objects we see during parsing */
#define SNAPSHOT_OBJ_BUFSIZE 64
#define SNAPSHOT_OBJ_TYPE_IB 0
+/* Keep track of how many bytes are frozen after a snapshot and tell the user */
static int snapshot_frozen_objsize;
static struct kgsl_snapshot_obj {
@@ -36,14 +39,22 @@
int dwords;
} objbuf[SNAPSHOT_OBJ_BUFSIZE];
+/* Pointer to the next open entry in the object list */
static int objbufptr;
+/* Push a new buffer object onto the list */
static void push_object(struct kgsl_device *device, int type, uint32_t ptbase,
uint32_t gpuaddr, int dwords)
{
int index;
void *ptr;
+ /*
+ * Sometimes IBs can be reused in the same dump. Because we parse from
+ * oldest to newest, if we come across an IB that has already been used,
+ * assume that it has been reused and update the list with the newest
+ * size.
+ */
for (index = 0; index < objbufptr; index++) {
if (objbuf[index].gpuaddr == gpuaddr &&
@@ -58,6 +69,10 @@
return;
}
+ /*
+ * adreno_convertaddr verifies that the IB size is valid - at least in
+ * the context of it being smaller then the allocated memory space
+ */
ptr = adreno_convertaddr(device, ptbase, gpuaddr, dwords << 2);
if (ptr == NULL) {
@@ -66,7 +81,7 @@
return;
}
-
+ /* Put it on the list of things to parse */
objbuf[objbufptr].type = type;
objbuf[objbufptr].gpuaddr = gpuaddr;
objbuf[objbufptr].ptbase = ptbase;
@@ -74,6 +89,10 @@
objbuf[objbufptr++].ptr = ptr;
}
+/*
+ * Return a 1 if the specified object is already on the list of buffers
+ * to be dumped
+ */
static int find_object(int type, unsigned int gpuaddr, unsigned int ptbase)
{
@@ -89,31 +108,64 @@
return 0;
}
+/*
+ * This structure keeps track of type0 writes to VSC_PIPE_DATA_ADDRESS_x and
+ * VSC_PIPE_DATA_LENGTH_x. When a draw initator is called these registers
+ * point to buffers that we need to freeze for a snapshot
+ */
static struct {
unsigned int base;
unsigned int size;
} vsc_pipe[8];
+/*
+ * This is the cached value of type0 writes to the VSC_SIZE_ADDRESS which
+ * contains the buffer address of the visiblity stream size buffer during a
+ * binning pass
+ */
static unsigned int vsc_size_address;
+/*
+ * This struct keeps track of type0 writes to VFD_FETCH_INSTR_0_X and
+ * VFD_FETCH_INSTR_1_X registers. When a draw initator is called the addresses
+ * and sizes in these registers point to VBOs that we need to freeze for a
+ * snapshot
+ */
static struct {
unsigned int base;
unsigned int stride;
} vbo[16];
+/*
+ * This is the cached value of type0 writes to VFD_INDEX_MAX. This will be used
+ * to calculate the size of the VBOs when the draw initator is called
+ */
static unsigned int vfd_index_max;
+/*
+ * This is the cached value of type0 writes to VFD_CONTROL_0 which tells us how
+ * many VBOs are active when the draw initator is called
+ */
static unsigned int vfd_control_0;
+/*
+ * Cached value of type0 writes to SP_VS_PVT_MEM_ADDR and SP_FS_PVT_MEM_ADDR.
+ * This is a buffer that contains private stack information for the shader
+ */
static unsigned int sp_vs_pvt_mem_addr;
static unsigned int sp_fs_pvt_mem_addr;
+/*
+ * Each load state block has two possible types. Each type has a different
+ * number of dwords per unit. Use this handy lookup table to make sure
+ * we dump the right amount of data from the indirect buffer
+ */
static int load_state_unit_sizes[7][2] = {
{ 2, 4 },
@@ -125,15 +177,31 @@
{ 8, 2 },
};
-static void ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
+static int ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
unsigned int ptbase)
{
unsigned int block, source, type;
+ int ret = 0;
+ /*
+ * The object here is to find indirect shaders i.e - shaders loaded from
+ * GPU memory instead of directly in the command. These should be added
+ * to the list of memory objects to dump. So look at the load state
+ * if the block is indirect (source = 4). If so then add the memory
+ * address to the list. The size of the object differs depending on the
+ * type per the load_state_unit_sizes array above.
+ */
if (type3_pkt_size(pkt[0]) < 2)
- return;
+ return 0;
+ /*
+ * pkt[1] 18:16 - source
+ * pkt[1] 21:19 - state block
+ * pkt[1] 31:22 - size in units
+ * pkt[2] 0:1 - type
+ * pkt[2] 31:2 - GPU memory address
+ */
block = (pkt[1] >> 19) & 0x07;
source = (pkt[1] >> 16) & 0x07;
@@ -147,34 +215,52 @@
else
unitsize = load_state_unit_sizes[block][1];
-
+ /* Freeze the GPU buffer containing the shader */
ret = kgsl_snapshot_get_object(device, ptbase,
pkt[2] & 0xFFFFFFFC,
(((pkt[1] >> 22) & 0x03FF) * unitsize) << 2,
SNAPSHOT_GPU_OBJECT_SHADER);
+
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
}
+
+ return ret;
}
+/*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
-static void ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
+static int ib_parse_set_bin_data(struct kgsl_device *device, unsigned int *pkt,
unsigned int ptbase)
{
int ret;
if (type3_pkt_size(pkt[0]) < 2)
- return;
+ return 0;
-
+ /* Visiblity stream buffer */
ret = kgsl_snapshot_get_object(device, ptbase, pkt[1], 0,
SNAPSHOT_GPU_OBJECT_GENERIC);
+
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
-
+ /* visiblity stream size buffer (fixed size 8 dwords) */
ret = kgsl_snapshot_get_object(device, ptbase, pkt[2], 32,
SNAPSHOT_GPU_OBJECT_GENERIC);
- snapshot_frozen_objsize += ret;
+
+ if (ret >= 0)
+ snapshot_frozen_objsize += ret;
+
+ return ret;
}
/*
@@ -183,13 +269,13 @@
* buffers that are written to as frozen
*/
-static void ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
+static int ib_parse_mem_write(struct kgsl_device *device, unsigned int *pkt,
unsigned int ptbase)
{
int ret;
if (type3_pkt_size(pkt[0]) < 1)
- return;
+ return 0;
/*
* The address is where the data in the rest of this packet is written
@@ -201,52 +287,77 @@
ret = kgsl_snapshot_get_object(device, ptbase, pkt[1] & 0xFFFFFFFC, 0,
SNAPSHOT_GPU_OBJECT_GENERIC);
- snapshot_frozen_objsize += ret;
+ if (ret >= 0)
+ snapshot_frozen_objsize += ret;
+
+ return ret;
}
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid". The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
-static void ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
unsigned int ptbase)
{
- int ret, i;
+ int ret = 0, i;
if (type3_pkt_size(pkt[0]) < 3)
- return;
+ return 0;
-
+ /* DRAW_IDX may have a index buffer pointer */
if (type3_pkt_size(pkt[0]) > 3) {
ret = kgsl_snapshot_get_object(device, ptbase, pkt[4], pkt[5],
SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
}
+ /*
+ * All of the type0 writes are valid at a draw initiator, so freeze
+ * the various buffers that we are tracking
+ */
-
+ /* First up the visiblity stream buffer */
for (i = 0; i < ARRAY_SIZE(vsc_pipe); i++) {
if (vsc_pipe[i].base != 0 && vsc_pipe[i].size != 0) {
ret = kgsl_snapshot_get_object(device, ptbase,
vsc_pipe[i].base, vsc_pipe[i].size,
SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
}
}
-
+ /* Next the visibility stream size buffer */
if (vsc_size_address) {
ret = kgsl_snapshot_get_object(device, ptbase,
vsc_size_address, 32,
SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
}
-
+ /* Next private shader buffer memory */
if (sp_vs_pvt_mem_addr) {
ret = kgsl_snapshot_get_object(device, ptbase,
sp_vs_pvt_mem_addr, 8192,
SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
sp_vs_pvt_mem_addr = 0;
}
@@ -255,16 +366,24 @@
ret = kgsl_snapshot_get_object(device, ptbase,
sp_fs_pvt_mem_addr, 8192,
SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
sp_fs_pvt_mem_addr = 0;
}
-
+ /* Finally: VBOs */
-
+ /* The number of active VBOs is stored in VFD_CONTROL_O[31:27] */
for (i = 0; i < (vfd_control_0) >> 27; i++) {
int size;
+ /*
+ * The size of the VBO is the stride stored in
+ * VFD_FETCH_INSTR_0_X.BUFSTRIDE * VFD_INDEX_MAX. The base
+ * is stored in VFD_FETCH_INSTR_1_X
+ */
if (vbo[i].base != 0) {
size = vbo[i].stride * vfd_index_max;
@@ -272,6 +391,9 @@
ret = kgsl_snapshot_get_object(device, ptbase,
vbo[i].base,
0, SNAPSHOT_GPU_OBJECT_GENERIC);
+ if (ret < 0)
+ return -EINVAL;
+
snapshot_frozen_objsize += ret;
}
@@ -281,26 +403,30 @@
vfd_control_0 = 0;
vfd_index_max = 0;
+
+ return ret;
}
+/*
+ * Parse all the type3 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
-static void ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
+static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
unsigned int ptbase)
{
- switch (cp_type3_opcode(*ptr)) {
- case CP_LOAD_STATE:
- ib_parse_load_state(device, ptr, ptbase);
- break;
- case CP_SET_BIN_DATA:
- ib_parse_set_bin_data(device, ptr, ptbase);
- break;
- case CP_MEM_WRITE:
- ib_parse_mem_write(device, ptr, ptbase);
- break;
- case CP_DRAW_INDX:
- ib_parse_draw_indx(device, ptr, ptbase);
- break;
- }
+ int opcode = cp_type3_opcode(*ptr);
+
+ if (opcode == CP_LOAD_STATE)
+ return ib_parse_load_state(device, ptr, ptbase);
+ else if (opcode == CP_SET_BIN_DATA)
+ return ib_parse_set_bin_data(device, ptr, ptbase);
+ else if (opcode == CP_MEM_WRITE)
+ return ib_parse_mem_write(device, ptr, ptbase);
+ else if (opcode == CP_DRAW_INDX)
+ return ib_parse_draw_indx(device, ptr, ptbase);
+
+ return 0;
}
/*
@@ -320,12 +446,23 @@
for (i = 0; i < size; i++, offset++) {
-
+ /* Visiblity stream buffer */
if (offset >= A3XX_VSC_PIPE_DATA_ADDRESS_0 &&
offset <= A3XX_VSC_PIPE_DATA_LENGTH_7) {
int index = offset - A3XX_VSC_PIPE_DATA_ADDRESS_0;
+ /* Each bank of address and length registers are
+ * interleaved with an empty register:
+ *
+ * address 0
+ * length 0
+ * empty
+ * address 1
+ * length 1
+ * empty
+ * ...
+ */
if ((index % 3) == 0)
vsc_pipe[index / 3].base = ptr[i + 1];
@@ -335,6 +472,11 @@
(offset <= A3XX_VFD_FETCH_INSTR_1_F)) {
int index = offset - A3XX_VFD_FETCH_INSTR_0_0;
+ /*
+ * FETCH_INSTR_0_X and FETCH_INSTR_1_X banks are
+ * interleaved as above but without the empty register
+ * in between
+ */
if ((index % 2) == 0)
vbo[index >> 1].stride =
@@ -342,6 +484,10 @@
else
vbo[index >> 1].base = ptr[i + 1];
} else {
+ /*
+ * Cache various support registers for calculating
+ * buffer sizes
+ */
switch (offset) {
case A3XX_VFD_CONTROL_0:
@@ -364,26 +510,32 @@
}
}
+/* Add an IB as a GPU object, but first, parse it to find more goodies within */
-static void ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
+static int ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
unsigned int gpuaddr, unsigned int dwords)
{
int i, ret, rem = dwords;
unsigned int *src;
+ /*
+ * If the object is already in the list, we don't need to parse it again
+ */
if (kgsl_snapshot_have_object(device, ptbase, gpuaddr, dwords << 2))
- return;
+ return 0;
src = (unsigned int *) adreno_convertaddr(device, ptbase, gpuaddr,
dwords << 2);
if (src == NULL)
- return;
+ return -EINVAL;
for (i = 0; rem > 0; rem--, i++) {
int pktsize;
+ /* If the packet isn't a type 1 or a type 3, then don't bother
+ * parsing it - it is likely corrupted */
if (!pkt_is_type0(src[i]) && !pkt_is_type3(src[i]))
break;
@@ -394,11 +546,46 @@
break;
if (pkt_is_type3(src[i])) {
- if (adreno_cmd_is_ib(src[i]))
- ib_add_gpu_object(device, ptbase,
- src[i + 1], src[i + 2]);
- else
- ib_parse_type3(device, &src[i], ptbase);
+ if (adreno_cmd_is_ib(src[i])) {
+ unsigned int gpuaddr = src[i + 1];
+ unsigned int size = src[i + 2];
+ unsigned int ibbase;
+
+ /* Address of the last processed IB2 */
+ kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
+
+ /*
+ * If this is the last IB2 that was executed,
+ * then push it to make sure it goes into the
+ * static space
+ */
+
+ if (ibbase == gpuaddr)
+ push_object(device,
+ SNAPSHOT_OBJ_TYPE_IB, ptbase,
+ gpuaddr, size);
+ else {
+ ret = ib_add_gpu_object(device,
+ ptbase, gpuaddr, size);
+
+ /*
+ * If adding the IB failed then stop
+ * parsing
+ */
+ if (ret < 0)
+ goto done;
+ }
+ } else {
+ ret = ib_parse_type3(device, &src[i], ptbase);
+ /*
+ * If the parse function failed (probably
+ * because of a bad decode) then bail out and
+ * just capture the binary IB data
+ */
+
+ if (ret < 0)
+ goto done;
+ }
} else if (pkt_is_type0(src[i])) {
ib_parse_type0(device, &src[i], ptbase);
}
@@ -407,12 +594,17 @@
rem -= pktsize;
}
+done:
ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
SNAPSHOT_GPU_OBJECT_IB);
- snapshot_frozen_objsize += ret;
+ if (ret >= 0)
+ snapshot_frozen_objsize += ret;
+
+ return ret;
}
+/* Snapshot the istore memory */
static int snapshot_istore(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
@@ -437,6 +629,7 @@
return (count * 4) + sizeof(*header);
}
+/* Snapshot the ringbuffer memory */
static int snapshot_rb(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
@@ -448,15 +641,20 @@
int index, size, i;
int parse_ibs = 0, ib_parse_start;
-
+ /* Get the physical address of the MMU pagetable */
ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
-
+ /* Get the current read pointers for the RB */
kgsl_regread(device, REG_CP_RB_RPTR, &rptr);
-
+ /* Address of the last processed IB */
kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);
+ /*
+ * Figure out the window of ringbuffer data to dump. First we need to
+ * find where the last processed IB ws submitted. Start walking back
+ * from the rptr
+ */
index = rptr;
rbptr = rb->buffer_desc.hostptr;
@@ -467,7 +665,7 @@
if (index < 0) {
index = rb->sizedwords - 3;
-
+ /* We wrapped without finding what we wanted */
if (index < rb->wptr) {
index = rb->wptr;
break;
@@ -479,6 +677,11 @@
break;
} while (index != rb->wptr);
+ /*
+ * index points at the last submitted IB. We can only trust that the
+ * memory between the context switch and the hanging IB is valid, so
+ * the next step is to find the context switch before the submission
+ */
while (index != rb->wptr) {
index--;
@@ -486,6 +689,11 @@
if (index < 0) {
index = rb->sizedwords - 2;
+ /*
+ * Wrapped without finding the context switch. This is
+ * harmless - we should still have enough data to dump a
+ * valid state
+ */
if (index < rb->wptr) {
index = rb->wptr;
@@ -493,15 +701,23 @@
}
}
-
+ /* Break if the current packet is a context switch identifier */
if ((rbptr[index] == cp_nop_packet(1)) &&
(rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER))
break;
}
+ /*
+ * Index represents the start of the window of interest. We will try
+ * to dump all buffers between here and the rptr
+ */
ib_parse_start = index;
+ /*
+ * Dump the entire ringbuffer - the parser can choose how much of it to
+ * process
+ */
size = (rb->sizedwords << 2);
@@ -511,18 +727,26 @@
return 0;
}
-
+ /* Write the sub-header for the section */
header->start = rb->wptr;
header->end = rb->wptr;
header->wptr = rb->wptr;
header->rbsize = rb->sizedwords;
header->count = rb->sizedwords;
+ /*
+ * Loop through the RB, copying the data and looking for indirect
+ * buffers and MMU pagetable changes
+ */
index = rb->wptr;
for (i = 0; i < rb->sizedwords; i++) {
*data = rbptr[index];
+ /*
+ * Only parse IBs between the start and the rptr or the next
+ * context switch, whichever comes first
+ */
if (parse_ibs == 0 && index == ib_parse_start)
parse_ibs = 1;
@@ -533,17 +757,27 @@
unsigned int ibaddr = rbptr[index + 1];
unsigned int ibsize = rbptr[index + 2];
+ /*
+ * This will return non NULL if the IB happens to be
+ * part of the context memory (i.e - context switch
+ * command buffers)
+ */
struct kgsl_memdesc *memdesc =
adreno_find_ctxtmem(device, ptbase, ibaddr,
ibsize);
-
+ /* IOMMU uses a NOP IB placed in setsate memory */
if (NULL == memdesc)
if (kgsl_gpuaddr_in_memdesc(
&device->mmu.setstate_memory,
ibaddr, ibsize))
memdesc = &device->mmu.setstate_memory;
+ /*
+ * The IB from CP_IB1_BASE and the IBs for legacy
+ * context switch go into the snapshot all
+ * others get marked at GPU objects
+ */
if (ibaddr == ibbase || memdesc != NULL)
push_object(device, SNAPSHOT_OBJ_TYPE_IB,
@@ -561,10 +795,11 @@
data++;
}
-
+ /* Return the size of the section */
return size + sizeof(*header);
}
+/* Snapshot the memory for an indirect buffer */
static int snapshot_ib(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
@@ -572,7 +807,7 @@
struct kgsl_snapshot_obj *obj = priv;
unsigned int *src = obj->ptr;
unsigned int *dst = snapshot + sizeof(*header);
- int i;
+ int i, ret;
if (remain < (obj->dwords << 2) + sizeof(*header)) {
KGSL_DRV_ERR(device,
@@ -580,12 +815,12 @@
return 0;
}
-
+ /* Write the sub-header for the section */
header->gpuaddr = obj->gpuaddr;
header->ptbase = obj->ptbase;
header->size = obj->dwords;
-
+ /* Write the contents of the ib */
for (i = 0; i < obj->dwords; i++, src++, dst++) {
*dst = *src;
@@ -596,14 +831,20 @@
if (adreno_cmd_is_ib(*src))
push_object(device, SNAPSHOT_OBJ_TYPE_IB,
obj->ptbase, src[1], src[2]);
- else
- ib_parse_type3(device, src, obj->ptbase);
+ else {
+ ret = ib_parse_type3(device, src, obj->ptbase);
+
+ /* Stop parsing if the type3 decode fails */
+ if (ret < 0)
+ break;
+ }
}
}
return (obj->dwords << 2) + sizeof(*header);
}
+/* Dump another item on the current pending list */
static void *dump_object(struct kgsl_device *device, int obj, void *snapshot,
int *remain)
{
@@ -623,6 +864,15 @@
return snapshot;
}
+/* adreno_snapshot - Snapshot the Adreno GPU state
+ * @device - KGSL device to snapshot
+ * @snapshot - Pointer to the start of memory to write into
+ * @remain - A pointer to how many bytes of memory are remaining in the snapshot
+ * @hang - set if this snapshot was automatically triggered by a GPU hang
+ * This is a hook function called by kgsl_snapshot to snapshot the
+ * Adreno specific information for the GPU snapshot. In turn, this function
+ * calls the GPU specific snapshot function to get core specific information.
+ */
void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
int hang)
@@ -631,12 +881,12 @@
uint32_t ptbase, ibbase, ibsize;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-
+ /* Reset the list of objects */
objbufptr = 0;
snapshot_frozen_objsize = 0;
-
+ /* Clear the caches for the visibilty stream and VBO parsing */
vfd_control_0 = 0;
vfd_index_max = 0;
@@ -645,17 +895,30 @@
memset(vsc_pipe, 0, sizeof(vsc_pipe));
memset(vbo, 0, sizeof(vbo));
-
+ /* Get the physical address of the MMU pagetable */
ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
-
+ /* Dump the ringbuffer */
snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB,
snapshot, remain, snapshot_rb, NULL);
+ /*
+ * Make sure that the last IB1 that was being executed is dumped.
+ * Since this was the last IB1 that was processed, we should have
+ * already added it to the list during the ringbuffer parse but we
+ * want to be double plus sure.
+ */
kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);
kgsl_regread(device, REG_CP_IB1_BUFSZ, &ibsize);
+ /*
+ * The problem is that IB size from the register is the unprocessed size
+ * of the buffer not the original size, so if we didn't catch this
+ * buffer being directly used in the RB, then we might not be able to
+ * dump the whle thing. Print a warning message so we can try to
+ * figure how often this really happens.
+ */
if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
@@ -667,15 +930,30 @@
kgsl_regread(device, REG_CP_IB2_BASE, &ibbase);
kgsl_regread(device, REG_CP_IB2_BUFSZ, &ibsize);
+ /*
+ * Add the last parsed IB2 to the list. The IB2 should be found as we
+ * parse the objects below, but we try to add it to the list first, so
+ * it too can be parsed. Don't print an error message in this case - if
+ * the IB2 is found during parsing, the list will be updated with the
+ * correct size.
+ */
if (!find_object(SNAPSHOT_OBJ_TYPE_IB, ibbase, ptbase) && ibsize) {
push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
ibbase, ibsize);
}
+ /*
+ * Go through the list of found objects and dump each one. As the IBs
+ * are parsed, more objects might be found, and objbufptr will increase
+ */
for (i = 0; i < objbufptr; i++)
snapshot = dump_object(device, i, snapshot, remain);
+ /*
+ * Only dump the istore on a hang - reading it on a running system
+ * has a non 0 chance of hanging the GPU
+ */
if (hang) {
snapshot = kgsl_snapshot_add_section(device,
@@ -683,7 +961,7 @@
snapshot_istore, NULL);
}
-
+ /* Add GPU specific sections - registers mainly, but other stuff too */
if (adreno_dev->gpudev->snapshot)
snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
remain, hang);
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 55692a6..3582a41 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -25,7 +25,7 @@
#include <linux/rbtree.h>
#include <linux/ashmem.h>
#include <linux/major.h>
-#include <linux/ion.h>
+#include <linux/msm_ion.h>
#include <linux/io.h>
#include <mach/socinfo.h>
@@ -36,6 +36,7 @@
#include "kgsl_sharedmem.h"
#include "kgsl_device.h"
#include "kgsl_trace.h"
+#include "kgsl_sync.h"
#undef MODULE_PARAM_PREFIX
#define MODULE_PARAM_PREFIX "kgsl."
@@ -51,112 +52,15 @@
static struct ion_client *kgsl_ion_client;
+/* kgsl_get_mem_entry - get the mem_entry structure for the specified object
+ * @device - Pointer to the device structure
+ * @ptbase - the pagetable base of the object
+ * @gpuaddr - the GPU address of the object
+ * @size - Size of the region to search
+ */
-int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
- void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv,
- void *owner)
-{
- struct kgsl_event *event;
- struct list_head *n;
- unsigned int cur_ts;
- struct kgsl_context *context = NULL;
-
- if (cb == NULL)
- return -EINVAL;
-
- if (id != KGSL_MEMSTORE_GLOBAL) {
- context = idr_find(&device->context_idr, id);
- if (context == NULL)
- return -EINVAL;
- }
- cur_ts = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
-
-
-
- if (timestamp_cmp(cur_ts, ts) >= 0) {
- cb(device, priv, id, cur_ts);
- return 0;
- }
-
- event = kzalloc(sizeof(*event), GFP_KERNEL);
- if (event == NULL)
- return -ENOMEM;
-
- event->context = context;
- event->timestamp = ts;
- event->priv = priv;
- event->func = cb;
- event->owner = owner;
-
-
- for (n = device->events.next ; n != &device->events; n = n->next) {
- struct kgsl_event *e =
- list_entry(n, struct kgsl_event, list);
-
- if (e->context != context)
- continue;
-
- if (timestamp_cmp(e->timestamp, ts) > 0) {
- list_add(&event->list, n->prev);
- break;
- }
- }
-
- if (n == &device->events)
- list_add_tail(&event->list, &device->events);
-
- queue_work(device->work_queue, &device->ts_expired_ws);
- return 0;
-}
-EXPORT_SYMBOL(kgsl_add_event);
-
-static void kgsl_cancel_events_ctxt(struct kgsl_device *device,
- struct kgsl_context *context)
-{
- struct kgsl_event *event, *event_tmp;
- unsigned int id, cur;
-
- cur = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
- id = context->id;
-
- list_for_each_entry_safe(event, event_tmp, &device->events, list) {
- if (event->context != context)
- continue;
-
- if (event->func)
- event->func(device, event->priv, id, cur);
-
- list_del(&event->list);
- kfree(event);
- }
-}
-
-void kgsl_cancel_events(struct kgsl_device *device,
- void *owner)
-{
- struct kgsl_event *event, *event_tmp;
- unsigned int id, cur;
-
- list_for_each_entry_safe(event, event_tmp, &device->events, list) {
- if (event->owner != owner)
- continue;
-
- cur = kgsl_readtimestamp(device, event->context,
- KGSL_TIMESTAMP_RETIRED);
-
- id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
- if (event->func)
- event->func(device, event->priv, id, cur);
-
- list_del(&event->list);
- kfree(event);
- }
-}
-EXPORT_SYMBOL(kgsl_cancel_events);
-
-
-struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
- unsigned int gpuaddr, unsigned int size)
+struct kgsl_mem_entry *kgsl_get_mem_entry(struct kgsl_device *device,
+ unsigned int ptbase, unsigned int gpuaddr, unsigned int size)
{
struct kgsl_process_private *priv;
struct kgsl_mem_entry *entry;
@@ -164,7 +68,7 @@
mutex_lock(&kgsl_driver.process_mutex);
list_for_each_entry(priv, &kgsl_driver.process_list, list) {
- if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase))
+ if (!kgsl_mmu_pt_equal(&device->mmu, priv->pagetable, ptbase))
continue;
spin_lock(&priv->mem_lock);
entry = kgsl_sharedmem_find_region(priv, gpuaddr, size);
@@ -189,10 +93,8 @@
if (!entry)
KGSL_CORE_ERR("kzalloc(%d) failed\n", sizeof(*entry));
- else {
+ else
kref_init(&entry->refcount);
- entry->memdesc.handle = NULL;
- }
return entry;
}
@@ -200,36 +102,6 @@
unsigned int kgsl_get_alloc_size(int detailed)
{
unsigned int ret = 0;
- struct kgsl_process_private *private;
- int i = 0;
-
- ret = kgsl_driver.stats.page_alloc;
- printk("kgsl: kgsl_driver.stats.page_alloc = %u\n", kgsl_driver.stats.page_alloc);
- printk("kgsl: kgsl_driver.stats.page_alloc_kernel = %u\n", kgsl_driver.stats.page_alloc_kernel);
- printk("kgsl: kgsl_driver.stats.pre_alloc = %u\n", kgsl_driver.stats.pre_alloc);
- printk("kgsl: kgsl_driver.stats.pre_alloc_kernel = %u\n", kgsl_driver.stats.pre_alloc_kernel);
-
- if (!detailed)
- return ret;
-
- mutex_lock(&kgsl_driver.process_mutex);
-
- list_for_each_entry(private, &kgsl_driver.process_list, list) {
- printk("kgsl: below is going to list all memory info of pid:%d \n", private->pid);
- for (i = 0; i < KGSL_MEM_ENTRY_MAX; i++) {
- switch (i) {
- case KGSL_MEM_ENTRY_PAGE_ALLOC:
- if (private->stats[KGSL_MEM_ENTRY_PAGE_ALLOC].cur != 0)
- printk("kgsl: page alloc %d\n", private->stats[KGSL_MEM_ENTRY_PAGE_ALLOC].cur);
- break;
- case KGSL_MEM_ENTRY_PRE_ALLOC:
- if (private->stats[KGSL_MEM_ENTRY_PRE_ALLOC].cur != 0)
- printk("kgsl: pre alloc %d\n", private->stats[KGSL_MEM_ENTRY_PRE_ALLOC].cur);
- break;
- }
- }
- }
- mutex_unlock(&kgsl_driver.process_mutex);
return ret;
}
@@ -244,6 +116,11 @@
if (entry->memtype != KGSL_MEM_ENTRY_KERNEL)
kgsl_driver.stats.mapped -= entry->memdesc.size;
+ /*
+ * Ion takes care of freeing the sglist for us so
+ * clear the sg before freeing the sharedmem so kgsl_sharedmem_free
+ * doesn't try to free it again
+ */
if (entry->memtype == KGSL_MEM_ENTRY_ION) {
entry->memdesc.sg = NULL;
@@ -295,9 +172,9 @@
spin_unlock(&process->mem_lock);
entry->priv = process;
- entry->memdesc.private = process;
}
+/* Detach a memory entry from a process and unmap it from the MMU */
static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry)
{
@@ -312,6 +189,7 @@
kgsl_mem_entry_put(entry);
}
+/* Allocate a new context id */
static struct kgsl_context *
kgsl_create_context(struct kgsl_device_private *dev_priv)
@@ -343,7 +221,7 @@
return NULL;
}
-
+ /* MAX - 1, there is one memdesc in memstore for device info */
if (id >= KGSL_MEMSTORE_MAX) {
KGSL_DRV_ERR(dev_priv->device, "cannot have more than %d "
"ctxts due to memstore limitation\n",
@@ -357,9 +235,44 @@
context->id = id;
context->dev_priv = dev_priv;
+ if (kgsl_sync_timeline_create(context)) {
+ idr_remove(&dev_priv->device->context_idr, id);
+ goto func_end;
+ }
+
+ /* Initialize the pending event list */
+ INIT_LIST_HEAD(&context->events);
+
+ /*
+ * Initialize the node that is used to maintain the master list of
+ * contexts with pending events in the device structure. Normally we
+ * wouldn't take the time to initalize a node but at event add time we
+ * call list_empty() on the node as a quick way of determining if the
+ * context is already in the master list so it needs to always be either
+ * active or in an unused but initialized state
+ */
+
+ INIT_LIST_HEAD(&context->events_list);
+
+func_end:
+ if (ret) {
+ kfree(context);
+ return NULL;
+ }
+
return context;
}
+/**
+ * kgsl_context_detach - Release the "master" context reference
+ * @context - The context that will be detached
+ *
+ * This is called when a context becomes unusable, because userspace
+ * has requested for it to be destroyed. The context itself may
+ * exist a bit longer until its reference count goes to zero.
+ * Other code referencing the context can detect that it has been
+ * detached because the context id will be set to KGSL_CONTEXT_INVALID.
+ */
void
kgsl_context_detach(struct kgsl_context *context)
{
@@ -373,8 +286,13 @@
if (device->ftbl->drawctxt_destroy)
device->ftbl->drawctxt_destroy(device, context);
-
+ /*device specific drawctxt_destroy MUST clean up devctxt */
BUG_ON(context->devctxt);
+ /*
+ * Cancel events after the device-specific context is
+ * destroyed, to avoid possibly freeing memory while
+ * it is still in use by the GPU.
+ */
kgsl_cancel_events_ctxt(device, context);
idr_remove(&device->context_idr, id);
context->id = KGSL_CONTEXT_INVALID;
@@ -386,45 +304,17 @@
{
struct kgsl_context *context = container_of(kref, struct kgsl_context,
refcount);
+ kgsl_sync_timeline_destroy(context);
kfree(context);
}
-void kgsl_timestamp_expired(struct work_struct *work)
-{
- struct kgsl_device *device = container_of(work, struct kgsl_device,
- ts_expired_ws);
- struct kgsl_event *event, *event_tmp;
- uint32_t ts_processed;
- unsigned int id;
-
- mutex_lock(&device->mutex);
-
-
- list_for_each_entry_safe(event, event_tmp, &device->events, list) {
- ts_processed = kgsl_readtimestamp(device, event->context,
- KGSL_TIMESTAMP_RETIRED);
- if (timestamp_cmp(ts_processed, event->timestamp) < 0)
- continue;
-
- id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
-
- if (event->func)
- event->func(device, event->priv, id, ts_processed);
-
- list_del(&event->list);
- kfree(event);
- }
-
- mutex_unlock(&device->mutex);
-}
-EXPORT_SYMBOL(kgsl_timestamp_expired);
-
static void kgsl_check_idle_locked(struct kgsl_device *device)
{
if (device->pwrctrl.nap_allowed == true &&
device->state == KGSL_STATE_ACTIVE &&
device->requested_state == KGSL_STATE_NONE) {
kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
+ kgsl_pwrscale_idle(device);
if (kgsl_pwrctrl_sleep(device) != 0)
mod_timer(&device->idle_timer,
jiffies +
@@ -519,22 +409,24 @@
policy_saved = device->pwrscale.policy;
device->pwrscale.policy = NULL;
kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
+ /* Make sure no user process is waiting for a timestamp *
+ * before supending */
if (device->active_cnt != 0) {
mutex_unlock(&device->mutex);
wait_for_completion(&device->suspend_gate);
mutex_lock(&device->mutex);
}
-
+ /* Don't let the timer wake us during suspended sleep. */
del_timer_sync(&device->idle_timer);
switch (device->state) {
case KGSL_STATE_INIT:
break;
case KGSL_STATE_ACTIVE:
-
+ /* Wait for the device to become idle */
device->ftbl->idle(device);
case KGSL_STATE_NAP:
case KGSL_STATE_SLEEP:
-
+ /* Get the completion ready to be waited upon. */
INIT_COMPLETION(device->hwaccess_gate);
device->ftbl->suspend_context(device);
device->ftbl->stop(device);
@@ -566,19 +458,16 @@
{
int status = -EINVAL;
- if (!device) {
- printk("kgsl_resume_device: device is null!\n");
+ if (!device)
return -EINVAL;
- }
+
KGSL_PWR_WARN(device, "resume start\n");
mutex_lock(&device->mutex);
if (device->state == KGSL_STATE_SUSPEND) {
kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
status = 0;
complete_all(&device->hwaccess_gate);
- }else
- printk(" kgsl_resume_device: state=%d\n", device->state);
-
+ }
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
mutex_unlock(&device->mutex);
@@ -663,13 +552,11 @@
}
EXPORT_SYMBOL(kgsl_late_resume_driver);
+/* file operations */
static struct kgsl_process_private *
kgsl_get_process_private(struct kgsl_device_private *cur_dev_priv)
{
struct kgsl_process_private *private;
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- int i;
-#endif
mutex_lock(&kgsl_driver.process_mutex);
list_for_each_entry(private, &kgsl_driver.process_list, list) {
@@ -679,7 +566,7 @@
}
}
-
+ /* no existing process private found for this dev_priv, create one */
private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL);
if (private == NULL) {
KGSL_DRV_ERR(cur_dev_priv->device, "kzalloc(%d) failed\n",
@@ -687,15 +574,6 @@
goto out;
}
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- private->gputime.total = 0;
- private->gputime.busy = 0;
- for(i=0;i<KGSL_MAX_PWRLEVELS;i++) {
- private->gputime_in_state[i].total = 0;
- private->gputime_in_state[i].busy = 0;
- }
-#endif
-
spin_lock_init(&private->mem_lock);
private->refcnt = 1;
private->pid = task_tgid_nr(current);
@@ -717,6 +595,7 @@
list_add(&private->list, &kgsl_driver.process_list);
kgsl_process_init_sysfs(private);
+ kgsl_process_init_debugfs(private);
out:
mutex_unlock(&kgsl_driver.process_mutex);
@@ -739,6 +618,7 @@
goto unlock;
kgsl_process_uninit_sysfs(private);
+ debugfs_remove_recursive(private->debug_root);
list_del(&private->list);
@@ -779,6 +659,12 @@
next = next + 1;
}
+ /*
+ * Clean up any to-be-freed entries that belong to this
+ * process and this device. This is done after the context
+ * are destroyed to avoid possibly freeing memory while
+ * it is still in use by the GPU.
+ */
kgsl_cancel_events(device, dev_priv);
device->open_count--;
@@ -831,7 +717,7 @@
dev_priv->device = device;
filep->private_data = dev_priv;
-
+ /* Get file (per process) private struct */
dev_priv->process_priv = kgsl_get_process_private(dev_priv);
if (dev_priv->process_priv == NULL) {
result = -ENOMEM;
@@ -872,6 +758,7 @@
return result;
}
+/*call with private->mem_lock locked */
struct kgsl_mem_entry *
kgsl_sharedmem_find_region(struct kgsl_process_private *private,
unsigned int gpuaddr, size_t size)
@@ -904,12 +791,14 @@
}
EXPORT_SYMBOL(kgsl_sharedmem_find_region);
+/*call with private->mem_lock locked */
static inline struct kgsl_mem_entry *
kgsl_sharedmem_find(struct kgsl_process_private *private, unsigned int gpuaddr)
{
return kgsl_sharedmem_find_region(private, gpuaddr, 1);
}
+/*call all ioctl sub functions with driver locked*/
static long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
@@ -937,7 +826,7 @@
}
case KGSL_PROP_GPU_RESET_STAT:
{
-
+ /* Return reset status of given context and clear it */
uint32_t id;
struct kgsl_context *context;
@@ -945,7 +834,7 @@
result = -EINVAL;
break;
}
-
+ /* We expect the value passed in to contain the context id */
if (copy_from_user(&id, param->value,
sizeof(unsigned int))) {
result = -EFAULT;
@@ -956,12 +845,16 @@
result = -EINVAL;
break;
}
+ /*
+ * Copy the reset status to value which also serves as
+ * the out parameter
+ */
if (copy_to_user(param->value, &(context->reset_status),
sizeof(unsigned int))) {
result = -EFAULT;
break;
}
-
+ /* Clear reset status once its been queried */
context->reset_status = KGSL_CTX_STAT_NO_ERROR;
break;
}
@@ -979,7 +872,7 @@
unsigned int cmd, void *data)
{
int result = 0;
-
+ /* The getproperty struct is reused for setproperty too */
struct kgsl_device_getproperty *param = data;
if (dev_priv->device->ftbl->setproperty)
@@ -999,7 +892,7 @@
struct kgsl_device *device = dev_priv->device;
unsigned int context_id = context ? context->id : KGSL_MEMSTORE_GLOBAL;
-
+ /* Set the active count so that suspend doesn't do the wrong thing */
device->active_cnt++;
@@ -1016,11 +909,8 @@
KGSL_TIMESTAMP_RETIRED),
result);
-
-
- INIT_COMPLETION(dev_priv->device->suspend_gate);
- dev_priv->device->active_cnt--;
- complete(&dev_priv->device->suspend_gate);
+ /* Fire off any pending suspend operations that are in flight */
+ kgsl_active_count_put(dev_priv->device);
return result;
}
@@ -1049,6 +939,11 @@
param->context_id);
return -EINVAL;
}
+ /*
+ * A reference count is needed here, because waittimestamp may
+ * block with the device mutex unlocked and userspace could
+ * request for the context to be destroyed during that time.
+ */
kgsl_context_get(context);
result = _device_waittimestamp(dev_priv, context,
param->timestamp, param->timeout);
@@ -1085,6 +980,19 @@
goto done;
}
+ /*
+ * Put a reasonable upper limit on the number of IBs that can be
+ * submitted
+ */
+
+ if (param->numibs > 10000) {
+ KGSL_DRV_ERR(dev_priv->device,
+ "Too many IBs submitted. count: %d max 10000\n",
+ param->numibs);
+ result = -EINVAL;
+ goto done;
+ }
+
ibdesc = kzalloc(sizeof(struct kgsl_ibdesc) * param->numibs,
GFP_KERNEL);
if (!ibdesc) {
@@ -1105,6 +1013,8 @@
} else {
KGSL_DRV_INFO(dev_priv->device,
"Using single IB submission mode for ib submission\n");
+ /* If user space driver is still using the old mode of
+ * submitting single ib then we need to support that as well */
ibdesc = kzalloc(sizeof(struct kgsl_ibdesc), GFP_KERNEL);
if (!ibdesc) {
KGSL_MEM_ERR(dev_priv->device,
@@ -1332,97 +1242,6 @@
return vma;
}
-static long
-kgsl_ioctl_sharedmem_from_vmalloc(struct kgsl_device_private *dev_priv,
- unsigned int cmd, void *data)
-{
- int result = 0, len = 0;
- struct kgsl_process_private *private = dev_priv->process_priv;
- struct kgsl_sharedmem_from_vmalloc *param = data;
- struct kgsl_mem_entry *entry = NULL;
- struct vm_area_struct *vma;
-
- KGSL_DEV_ERR_ONCE(dev_priv->device, "IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC"
- " is deprecated\n");
- if (!kgsl_mmu_enabled())
- return -ENODEV;
-
- if (!param->hostptr) {
- KGSL_CORE_ERR("invalid hostptr %x\n", param->hostptr);
- result = -EINVAL;
- goto error;
- }
-
- vma = kgsl_get_vma_from_start_addr(param->hostptr);
- if (!vma) {
- result = -EINVAL;
- goto error;
- }
-
- if (param->gpuaddr != 0) {
- len = param->gpuaddr;
- } else {
- if (vma->vm_pgoff || (param->hostptr != vma->vm_start)) {
- KGSL_CORE_ERR("VMA region does not match hostaddr\n");
- result = -EINVAL;
- goto error;
- }
-
- len = vma->vm_end - vma->vm_start;
- }
-
-
- if (len == 0 || param->hostptr + len > vma->vm_end) {
- KGSL_CORE_ERR("Invalid memory allocation length %d\n", len);
- result = -EINVAL;
- goto error;
- }
-
- entry = kgsl_mem_entry_create();
- if (entry == NULL) {
- result = -ENOMEM;
- goto error;
- }
-
- result = kgsl_sharedmem_page_alloc_user(&entry->memdesc,
- private,
- private->pagetable, len,
- param->flags);
- if (result != 0)
- goto error_free_entry;
-
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
- result = kgsl_sharedmem_map_vma(vma, &entry->memdesc);
- if (result) {
- KGSL_CORE_ERR("kgsl_sharedmem_map_vma failed: %d\n", result);
- goto error_free_alloc;
- }
-
- param->gpuaddr = entry->memdesc.gpuaddr;
-
- entry->memtype = KGSL_MEM_ENTRY_KERNEL;
-
- kgsl_mem_entry_attach_process(entry, private);
-
- trace_kgsl_mem_alloc(entry);
-
- kgsl_process_add_stats(private, entry->memtype, len);
-
- kgsl_check_idle(dev_priv->device);
- return 0;
-
-error_free_alloc:
- kgsl_sharedmem_free(&entry->memdesc);
-
-error_free_entry:
- kfree(entry);
-
-error:
- kgsl_check_idle(dev_priv->device);
- return result;
-}
-
static inline int _check_region(unsigned long start, unsigned long size,
uint64_t len)
{
@@ -1483,20 +1302,35 @@
ret = -ERANGE;
- if (phys == 0)
- goto err;
-
- if ((len & ~PAGE_MASK) ||
- (offset & ~PAGE_MASK) ||
- (size & ~PAGE_MASK)) {
- KGSL_CORE_ERR("length offset or size is not page aligned\n");
+ if (phys == 0) {
+ KGSL_CORE_ERR("kgsl_get_phys_file returned phys=0\n");
goto err;
}
-
- if (offset >= len || size > len)
+ /* Make sure the length of the region, the offset and the desired
+ * size are all page aligned or bail
+ */
+ if ((len & ~PAGE_MASK) ||
+ (offset & ~PAGE_MASK) ||
+ (size & ~PAGE_MASK)) {
+ KGSL_CORE_ERR("length %lu, offset %u or size %u "
+ "is not page aligned\n",
+ len, offset, size);
goto err;
+ }
+ /* The size or offset can never be greater than the PMEM length */
+ if (offset >= len || size > len) {
+ KGSL_CORE_ERR("offset %u or size %u "
+ "exceeds pmem length %lu\n",
+ offset, size, len);
+ goto err;
+ }
+
+ /* If size is 0, then adjust it to default to the size of the region
+ * minus the offset. If size isn't zero, then make sure that it will
+ * fit inside of the region.
+ */
if (size == 0)
size = len - offset;
@@ -1535,6 +1369,8 @@
return -ENOMEM;
memdesc->sglen = sglen;
+ memdesc->sglen_alloc = sglen;
+
sg_init_table(memdesc->sg, sglen);
spin_lock(¤t->mm->page_table_lock);
@@ -1593,7 +1429,7 @@
return -EINVAL;
}
-
+ /* We don't necessarily start at vma->vm_start */
len = vma->vm_end - (unsigned long) hostptr;
if (offset >= len)
@@ -1610,7 +1446,7 @@
if (size == 0)
size = len;
-
+ /* Adjust the size of the region to account for the offset */
size += offset & ~PAGE_MASK;
size = ALIGN(size, PAGE_SIZE);
@@ -1726,7 +1562,7 @@
entry->memdesc.sg = sg_table->sgl;
-
+ /* Calculate the size of the memdesc from the sglist */
entry->memdesc.sglen = 0;
@@ -1760,6 +1596,8 @@
else
memtype = param->memtype;
+ entry->memdesc.flags = param->flags;
+
switch (memtype) {
case KGSL_USER_MEM_TYPE_PMEM:
if (param->fd == 0 || param->len == 0)
@@ -1819,6 +1657,11 @@
if (result)
goto error;
+ if (entry->memdesc.size >= SZ_1M)
+ kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M));
+ else if (entry->memdesc.size >= SZ_64K)
+ kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64));
+
result = kgsl_mmu_map(private->pagetable,
&entry->memdesc,
GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
@@ -1826,7 +1669,7 @@
if (result)
goto error_put_file_ptr;
-
+ /* Adjust the returned value for a non 4k aligned offset */
param->gpuaddr = entry->memdesc.gpuaddr + (param->offset & ~PAGE_MASK);
KGSL_STATS_ADD(param->len, kgsl_driver.stats.mapped,
@@ -1859,6 +1702,8 @@
return result;
}
+/*This function flushes a graphics memory allocation from CPU cache
+ *when caching is enabled with MMU*/
static long
kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
@@ -1900,7 +1745,7 @@
if (entry == NULL)
return -ENOMEM;
- result = kgsl_allocate_user(&entry->memdesc, private, private->pagetable,
+ result = kgsl_allocate_user(&entry->memdesc, private->pagetable,
param->size, param->flags);
if (result == 0) {
@@ -1953,6 +1798,15 @@
struct genlock *lock;
};
+/**
+ * kgsl_genlock_event_cb - Event callback for a genlock timestamp event
+ * @device - The KGSL device that expired the timestamp
+ * @priv - private data for the event
+ * @context_id - the context id that goes with the timestamp
+ * @timestamp - the timestamp that triggered the event
+ *
+ * Release a genlock lock following the expiration of a timestamp
+ */
static void kgsl_genlock_event_cb(struct kgsl_device *device,
void *priv, u32 context_id, u32 timestamp)
@@ -1969,6 +1823,18 @@
kfree(ev);
}
+/**
+ * kgsl_add_genlock-event - Create a new genlock event
+ * @device - KGSL device to create the event on
+ * @timestamp - Timestamp to trigger the event
+ * @data - User space buffer containing struct kgsl_genlock_event_priv
+ * @len - length of the userspace buffer
+ * @owner - driver instance that owns this event
+ * @returns 0 on success or error code on error
+ *
+ * Attack to a genlock handle and register an event to release the
+ * genlock lock when the timestamp expires
+ */
static int kgsl_add_genlock_event(struct kgsl_device *device,
u32 context_id, u32 timestamp, void __user *data, int len,
@@ -2013,6 +1879,13 @@
}
#endif
+/**
+ * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace
+ * @dev_priv - pointer to the private device structure
+ * @cmd - the ioctl cmd passed from kgsl_ioctl
+ * @data - the user data buffer from kgsl_ioctl
+ * @returns 0 on success or error code on failure
+ */
static long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
@@ -2026,6 +1899,11 @@
param->context_id, param->timestamp, param->priv,
param->len, dev_priv);
break;
+ case KGSL_TIMESTAMP_EVENT_FENCE:
+ ret = kgsl_add_fence_event(dev_priv->device,
+ param->context_id, param->timestamp, param->priv,
+ param->len, dev_priv);
+ break;
default:
ret = -EINVAL;
}
@@ -2036,42 +1914,54 @@
typedef long (*kgsl_ioctl_func_t)(struct kgsl_device_private *,
unsigned int, void *);
-#define KGSL_IOCTL_FUNC(_cmd, _func, _lock) \
- [_IOC_NR(_cmd)] = { .cmd = _cmd, .func = _func, .lock = _lock }
+#define KGSL_IOCTL_FUNC(_cmd, _func, _flags) \
+ [_IOC_NR((_cmd))] = \
+ { .cmd = (_cmd), .func = (_func), .flags = (_flags) }
+
+#define KGSL_IOCTL_LOCK BIT(0)
+#define KGSL_IOCTL_WAKE BIT(1)
static const struct {
unsigned int cmd;
kgsl_ioctl_func_t func;
- int lock;
+ int flags;
} kgsl_ioctl_funcs[] = {
KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
- kgsl_ioctl_device_getproperty, 1),
+ kgsl_ioctl_device_getproperty,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP,
- kgsl_ioctl_device_waittimestamp, 1),
+ kgsl_ioctl_device_waittimestamp,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
- kgsl_ioctl_device_waittimestamp_ctxtid, 1),
+ kgsl_ioctl_device_waittimestamp_ctxtid,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
- kgsl_ioctl_rb_issueibcmds, 1),
+ kgsl_ioctl_rb_issueibcmds,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP,
- kgsl_ioctl_cmdstream_readtimestamp, 1),
+ kgsl_ioctl_cmdstream_readtimestamp,
+ KGSL_IOCTL_LOCK),
KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
- kgsl_ioctl_cmdstream_readtimestamp_ctxtid, 1),
+ kgsl_ioctl_cmdstream_readtimestamp_ctxtid,
+ KGSL_IOCTL_LOCK),
KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP,
- kgsl_ioctl_cmdstream_freememontimestamp, 1),
+ kgsl_ioctl_cmdstream_freememontimestamp,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID,
- kgsl_ioctl_cmdstream_freememontimestamp_ctxtid, 1),
+ kgsl_ioctl_cmdstream_freememontimestamp_ctxtid,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
- kgsl_ioctl_drawctxt_create, 1),
+ kgsl_ioctl_drawctxt_create,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
- kgsl_ioctl_drawctxt_destroy, 1),
+ kgsl_ioctl_drawctxt_destroy,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM,
kgsl_ioctl_map_user_mem, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM,
kgsl_ioctl_map_user_mem, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE,
kgsl_ioctl_sharedmem_free, 0),
- KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC,
- kgsl_ioctl_sharedmem_from_vmalloc, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE,
kgsl_ioctl_sharedmem_flush_cache, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC,
@@ -2081,9 +1971,11 @@
KGSL_IOCTL_FUNC(IOCTL_KGSL_CFF_USER_EVENT,
kgsl_ioctl_cff_user_event, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT,
- kgsl_ioctl_timestamp_event, 1),
+ kgsl_ioctl_timestamp_event,
+ KGSL_IOCTL_LOCK),
KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
- kgsl_ioctl_device_setproperty, 1),
+ kgsl_ioctl_device_setproperty,
+ KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE)
};
static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -2091,17 +1983,21 @@
struct kgsl_device_private *dev_priv = filep->private_data;
unsigned int nr;
kgsl_ioctl_func_t func;
- int lock, ret;
+ int lock, ret, use_hw;
char ustack[64];
void *uptr = NULL;
BUG_ON(dev_priv == NULL);
+ /* Workaround for an previously incorrectly defined ioctl code.
+ This helps ensure binary compatability */
if (cmd == IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_OLD)
cmd = IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP;
else if (cmd == IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_OLD)
cmd = IOCTL_KGSL_CMDSTREAM_READTIMESTAMP;
+ else if (cmd == IOCTL_KGSL_TIMESTAMP_EVENT_OLD)
+ cmd = IOCTL_KGSL_TIMESTAMP_EVENT;
nr = _IOC_NR(cmd);
@@ -2131,6 +2027,10 @@
if (nr < ARRAY_SIZE(kgsl_ioctl_funcs) &&
kgsl_ioctl_funcs[nr].func != NULL) {
+ /*
+ * Make sure that nobody tried to send us a malformed ioctl code
+ * with a valid NR but bogus flags
+ */
if (kgsl_ioctl_funcs[nr].cmd != cmd) {
KGSL_DRV_ERR(dev_priv->device,
@@ -2140,7 +2040,8 @@
}
func = kgsl_ioctl_funcs[nr].func;
- lock = kgsl_ioctl_funcs[nr].lock;
+ lock = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_LOCK;
+ use_hw = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_WAKE;
} else {
func = dev_priv->device->ftbl->ioctl;
if (!func) {
@@ -2150,11 +2051,13 @@
goto done;
}
lock = 1;
+ use_hw = 1;
}
if (lock) {
mutex_lock(&dev_priv->device->mutex);
- kgsl_check_suspended(dev_priv->device);
+ if (use_hw)
+ kgsl_check_suspended(dev_priv->device);
}
ret = func(dev_priv, cmd, uptr);
@@ -2182,7 +2085,8 @@
struct kgsl_memdesc *memdesc = &device->memstore;
int result;
unsigned int vma_size = vma->vm_end - vma->vm_start;
-
+
+ /* The memstore can only be mapped as read only */
if (vma->vm_flags & VM_WRITE)
return -EPERM;
@@ -2195,7 +2099,8 @@
vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
- result = remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+ result = remap_pfn_range(vma, vma->vm_start,
+ device->memstore.physaddr >> PAGE_SHIFT,
vma_size, vma->vm_page_prot);
if (result != 0)
KGSL_MEM_ERR(device, "remap_pfn_range failed: %d\n",
@@ -2204,6 +2109,10 @@
return result;
}
+/*
+ * kgsl_gpumem_vm_open is called whenever a vma region is copied or split.
+ * Increase the refcount to make sure that the accounting stays correct
+ */
static void kgsl_gpumem_vm_open(struct vm_area_struct *vma)
{
@@ -2242,14 +2151,13 @@
struct kgsl_process_private *private = dev_priv->process_priv;
struct kgsl_mem_entry *entry = NULL;
struct kgsl_device *device = dev_priv->device;
- int i;
-
+ /* Handle leagacy behavior for memstore */
- if (vma_offset == device->memstore.physaddr)
+ if (vma_offset == device->memstore.gpuaddr)
return kgsl_mmap_memstore(device, vma);
-
+ /* Find a chunk of GPU memory */
spin_lock(&private->mem_lock);
entry = kgsl_sharedmem_find(private, vma_offset);
@@ -2274,21 +2182,6 @@
vma->vm_ops = &kgsl_gpumem_vm_ops;
vma->vm_file = file;
-
- if(entry->memdesc.handle == NULL)
- {
- for(i = 0; (i*PAGE_SIZE) < (vma->vm_end - vma->vm_start); i++)
- {
- vm_insert_page(vma, vma->vm_start + i*PAGE_SIZE, sg_page(&entry->memdesc.sg[i]));
- }
- }
- else
- {
- remap_pfn_range(vma, vma->vm_start, __phys_to_pfn(entry->memdesc.sg[0].dma_address),
- entry->memdesc.size, vma->vm_page_prot);
- }
-
-
return 0;
}
@@ -2337,7 +2230,7 @@
int minor, ret;
dev_t dev;
-
+ /* Find a minor for the device */
mutex_lock(&kgsl_driver.devlock);
for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) {
@@ -2353,7 +2246,7 @@
return -ENODEV;
}
-
+ /* Create the device */
dev = MKDEV(MAJOR(kgsl_driver.major), minor);
device->dev = device_create(kgsl_driver.class,
device->parentdev,
@@ -2376,7 +2269,6 @@
int kgsl_device_platform_probe(struct kgsl_device *device)
{
int result;
- int i;
int status = -EINVAL;
struct resource *res;
struct platform_device *pdev =
@@ -2386,20 +2278,7 @@
if (status)
return status;
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- device->current_process_priv = NULL;
-#endif
-
-
- device->gputime.total = 0;
- device->gputime.busy = 0;
- for(i=0;i<KGSL_MAX_PWRLEVELS;i++) {
- device->gputime_in_state[i].total = 0;
- device->gputime_in_state[i].busy = 0;
- }
-
-
-
+ /* Initialize logging first, so that failures below actually print. */
kgsl_device_debugfs_init(device);
status = kgsl_pwrctrl_init(device);
@@ -2440,7 +2319,7 @@
status = -ENODEV;
goto error_pwrctrl_close;
}
-
+ /*acquire interrupt */
device->pwrctrl.interrupt_num =
platform_get_irq_byname(pdev, device->pwrctrl.irq_name);
@@ -2495,10 +2374,10 @@
pm_qos_add_request(&device->pm_qos_req_dma, PM_QOS_CPU_DMA_LATENCY,
PM_QOS_DEFAULT_VALUE);
-
+ /* Initalize the snapshot engine */
kgsl_device_snapshot_init(device);
-
+ /* Initialize common sysfs entries */
kgsl_pwrctrl_init_sysfs(device);
return 0;
@@ -2516,6 +2395,80 @@
}
EXPORT_SYMBOL(kgsl_device_platform_probe);
+int kgsl_postmortem_dump(struct kgsl_device *device, int manual)
+{
+ bool saved_nap;
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+ BUG_ON(device == NULL);
+
+ kgsl_cffdump_hang(device->id);
+
+ /* For a manual dump, make sure that the system is idle */
+
+ if (manual) {
+ if (device->active_cnt != 0) {
+ mutex_unlock(&device->mutex);
+ wait_for_completion(&device->suspend_gate);
+ mutex_lock(&device->mutex);
+ }
+
+ if (device->state == KGSL_STATE_ACTIVE)
+ kgsl_idle(device);
+
+ }
+
+ if (device->pm_dump_enable) {
+
+ KGSL_LOG_DUMP(device,
+ "POWER: FLAGS = %08lX | ACTIVE POWERLEVEL = %08X",
+ pwr->power_flags, pwr->active_pwrlevel);
+
+ KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
+ pwr->interval_timeout);
+
+ }
+
+ /* Disable the idle timer so we don't get interrupted */
+ del_timer_sync(&device->idle_timer);
+ mutex_unlock(&device->mutex);
+ flush_workqueue(device->work_queue);
+ mutex_lock(&device->mutex);
+
+ /* Turn off napping to make sure we have the clocks full
+ attention through the following process */
+ saved_nap = device->pwrctrl.nap_allowed;
+ device->pwrctrl.nap_allowed = false;
+
+ /* Force on the clocks */
+ kgsl_pwrctrl_wake(device);
+
+ /* Disable the irq */
+ kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+ /*Call the device specific postmortem dump function*/
+ device->ftbl->postmortem_dump(device, manual);
+
+ /* Restore nap mode */
+ device->pwrctrl.nap_allowed = saved_nap;
+
+ /* On a manual trigger, turn on the interrupts and put
+ the clocks to sleep. They will recover themselves
+ on the next event. For a hang, leave things as they
+ are until fault tolerance kicks in. */
+
+ if (manual) {
+ kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+
+ /* try to go into a sleep mode until the next event */
+ kgsl_pwrctrl_request_state(device, KGSL_STATE_SLEEP);
+ kgsl_pwrctrl_sleep(device);
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL(kgsl_postmortem_dump);
+
void kgsl_device_platform_remove(struct kgsl_device *device)
{
kgsl_device_snapshot_close(device);
@@ -2560,6 +2513,12 @@
kgsl_cffdump_destroy();
kgsl_core_debugfs_close();
+ /*
+ * We call kgsl_sharedmem_uninit_sysfs() and device_unregister()
+ * only if kgsl_driver.virtdev has been populated.
+ * We check at least one member of kgsl_driver.virtdev to
+ * see if it is not NULL (and thus, has been populated).
+ */
if (kgsl_driver.virtdev.class) {
kgsl_sharedmem_uninit_sysfs();
device_unregister(&kgsl_driver.virtdev);
@@ -2576,7 +2535,7 @@
static int __init kgsl_core_init(void)
{
int result = 0;
-
+ /* alloc major and minor device numbers */
result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX,
KGSL_NAME);
if (result < 0) {
@@ -2604,6 +2563,8 @@
goto err;
}
+ /* Make a virtual device for managing core related things
+ in sysfs */
kgsl_driver.virtdev.class = kgsl_driver.class;
dev_set_name(&kgsl_driver.virtdev, "kgsl");
result = device_register(&kgsl_driver.virtdev);
@@ -2612,7 +2573,7 @@
goto err;
}
-
+ /* Make kobjects in the virtual device for storing statistics */
kgsl_driver.ptkobj =
kobject_create_and_add("pagetables",
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 7e62475..3935164 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -22,24 +22,33 @@
#include <linux/cdev.h>
#include <linux/regulator/consumer.h>
#include <linux/mm.h>
-#include <linux/ion.h>
+
+#include <mach/kgsl.h>
#define KGSL_NAME "kgsl"
+/* The number of memstore arrays limits the number of contexts allowed.
+ * If more contexts are needed, update multiple for MEMSTORE_SIZE
+ */
#define KGSL_MEMSTORE_SIZE ((int)(PAGE_SIZE * 2))
#define KGSL_MEMSTORE_GLOBAL (0)
#define KGSL_MEMSTORE_MAX (KGSL_MEMSTORE_SIZE / \
sizeof(struct kgsl_devmemstore) - 1)
+/* Timestamp window used to detect rollovers (half of integer range) */
#define KGSL_TIMESTAMP_WINDOW 0x80000000
+/*cache coherency ops */
#define DRM_KGSL_GEM_CACHE_OP_TO_DEV 0x0001
#define DRM_KGSL_GEM_CACHE_OP_FROM_DEV 0x0002
+/* The size of each entry in a page table */
#define KGSL_PAGETABLE_ENTRY_SIZE 4
+/* Pagetable Virtual Address base */
#define KGSL_PAGETABLE_BASE 0x10000000
+/* Extra accounting entries needed in the pagetable */
#define KGSL_PT_EXTRA_ENTRIES 16
#define KGSL_PAGETABLE_ENTRIES(_sz) (((_sz) >> PAGE_SHIFT) + \
@@ -51,36 +60,41 @@
#define KGSL_PAGETABLE_COUNT 1
#endif
+/* Casting using container_of() for structures that kgsl owns. */
#define KGSL_CONTAINER_OF(ptr, type, member) \
container_of(ptr, type, member)
+/* A macro for memory statistics - add the new size to the stat and if
+ the statisic is greater then _max, set _max
+*/
#define KGSL_STATS_ADD(_size, _stat, _max) \
do { _stat += (_size); if (_stat > _max) _max = _stat; } while (0)
struct kgsl_device;
+struct kgsl_context;
struct kgsl_driver {
struct cdev cdev;
dev_t major;
struct class *class;
-
+ /* Virtual device for managing the core */
struct device virtdev;
-
+ /* Kobjects for storing pagetable and process statistics */
struct kobject *ptkobj;
struct kobject *prockobj;
struct kgsl_device *devp[KGSL_DEVICE_MAX];
-
+ /* Global lilst of open processes */
struct list_head process_list;
-
+ /* Global list of pagetables */
struct list_head pagetable_list;
-
+ /* Spinlock for accessing the pagetable list */
spinlock_t ptlock;
-
+ /* Mutex for accessing the process list */
struct mutex process_mutex;
-
+ /* Mutex for protecting the device list */
struct mutex devlock;
void *ptpool;
@@ -90,14 +104,10 @@
unsigned int vmalloc_max;
unsigned int page_alloc;
unsigned int page_alloc_max;
- unsigned int page_alloc_kernel;
unsigned int coherent;
unsigned int coherent_max;
unsigned int mapped;
unsigned int mapped_max;
- unsigned int pre_alloc;
- unsigned int pre_alloc_max;
- unsigned int pre_alloc_kernel;
unsigned int histogram[16];
} stats;
};
@@ -115,45 +125,36 @@
int (*map_kernel_mem)(struct kgsl_memdesc *);
};
+/* Internal definitions for memdesc->priv */
#define KGSL_MEMDESC_GUARD_PAGE BIT(0)
+/* Set if the memdesc is mapped into all pagetables */
+#define KGSL_MEMDESC_GLOBAL BIT(1)
+/* shared memory allocation */
struct kgsl_memdesc {
struct kgsl_pagetable *pagetable;
void *hostptr;
unsigned int gpuaddr;
unsigned int physaddr;
unsigned int size;
- unsigned int priv;
+ unsigned int priv; /* Internal flags and settings */
struct scatterlist *sg;
- unsigned int sglen;
+ unsigned int sglen; /* Active entries in the sglist */
+ unsigned int sglen_alloc; /* Allocated entries in the sglist */
struct kgsl_memdesc_ops *ops;
- int flags;
- struct ion_handle* handle;
- struct kgsl_process_private *private;
+ unsigned int flags; /* Flags set from userspace */
};
-#if 0
-#define KGSL_MEM_ENTRY_KERNEL 0
-#define KGSL_MEM_ENTRY_PMEM 1
-#define KGSL_MEM_ENTRY_ASHMEM 2
-#define KGSL_MEM_ENTRY_USER 3
-#define KGSL_MEM_ENTRY_ION 4
-#define KGSL_MEM_ENTRY_PAGE_ALLOC 5
-#define KGSL_MEM_ENTRY_PRE_ALLOC 6
-#define KGSL_MEM_ENTRY_MAX 7
-#else
-enum {
- KGSL_MEM_ENTRY_KERNEL = 0,
- KGSL_MEM_ENTRY_PMEM,
- KGSL_MEM_ENTRY_ASHMEM,
- KGSL_MEM_ENTRY_USER,
- KGSL_MEM_ENTRY_ION,
- KGSL_MEM_ENTRY_PAGE_ALLOC,
- KGSL_MEM_ENTRY_PRE_ALLOC,
- KGSL_MEM_ENTRY_MAX,
-};
-#endif
+/* List of different memory entry types */
+#define KGSL_MEM_ENTRY_KERNEL 0
+#define KGSL_MEM_ENTRY_PMEM 1
+#define KGSL_MEM_ENTRY_ASHMEM 2
+#define KGSL_MEM_ENTRY_USER 3
+#define KGSL_MEM_ENTRY_ION 4
+#define KGSL_MEM_ENTRY_MAX 5
+
+/* List of flags */
#define KGSL_MEM_ENTRY_FROZEN (1 << 0)
@@ -165,6 +166,8 @@
void *priv_data;
struct rb_node node;
unsigned int context_id;
+ /* back pointer to private structure under whose context this
+ * allocation is made */
struct kgsl_process_private *priv;
};
@@ -175,14 +178,17 @@
#endif
void kgsl_mem_entry_destroy(struct kref *kref);
+int kgsl_postmortem_dump(struct kgsl_device *device, int manual);
-struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
- unsigned int gpuaddr, unsigned int size);
+struct kgsl_mem_entry *kgsl_get_mem_entry(struct kgsl_device *device,
+ unsigned int ptbase, unsigned int gpuaddr, unsigned int size);
struct kgsl_mem_entry *kgsl_sharedmem_find_region(
struct kgsl_process_private *private, unsigned int gpuaddr,
size_t size);
+void kgsl_get_memory_usage(char *str, size_t len, unsigned int memflags);
+
int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv,
void *owner);
@@ -190,6 +196,9 @@
void kgsl_cancel_events(struct kgsl_device *device,
void *owner);
+void kgsl_cancel_events_ctxt(struct kgsl_device *device,
+ struct kgsl_context *context);
+
extern const struct dev_pm_ops kgsl_pm_ops;
struct early_suspend;
@@ -245,14 +254,19 @@
static inline int timestamp_cmp(unsigned int a, unsigned int b)
{
-
+ /* check for equal */
if (a == b)
return 0;
-
+ /* check for greater-than for non-rollover case */
if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW))
return 1;
+ /* check for greater-than for rollover case
+ * note that <= is required to ensure that consistent
+ * results are returned for values whose difference is
+ * equal to the window size
+ */
a += KGSL_TIMESTAMP_WINDOW;
b += KGSL_TIMESTAMP_WINDOW;
return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1;
@@ -270,4 +284,4 @@
kref_put(&entry->refcount, kgsl_mem_entry_destroy);
}
-#endif
+#endif /* __KGSL_H */
diff --git a/drivers/gpu/msm/kgsl_cffdump.c b/drivers/gpu/msm/kgsl_cffdump.c
new file mode 100644
index 0000000..e06c94d
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_cffdump.c
@@ -0,0 +1,591 @@
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* #define DEBUG */
+#define ALIGN_CPU
+
+#include <linux/spinlock.h>
+#include <linux/debugfs.h>
+#include <linux/relay.h>
+#include <linux/slab.h>
+#include <linux/time.h>
+#include <linux/sched.h>
+#include <mach/socinfo.h>
+
+#include "kgsl.h"
+#include "kgsl_cffdump.h"
+#include "kgsl_debugfs.h"
+#include "kgsl_log.h"
+#include "kgsl_sharedmem.h"
+#include "adreno_pm4types.h"
+
+static struct rchan *chan;
+static struct dentry *dir;
+static int suspended;
+static size_t dropped;
+static size_t subbuf_size = 256*1024;
+static size_t n_subbufs = 64;
+
+/* forward declarations */
+static void destroy_channel(void);
+static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs);
+
+static spinlock_t cffdump_lock;
+static ulong serial_nr;
+static ulong total_bytes;
+static ulong total_syncmem;
+static long last_sec;
+
+#define MEMBUF_SIZE 64
+
+#define CFF_OP_WRITE_REG 0x00000002
+struct cff_op_write_reg {
+ unsigned char op;
+ uint addr;
+ uint value;
+} __packed;
+
+#define CFF_OP_POLL_REG 0x00000004
+struct cff_op_poll_reg {
+ unsigned char op;
+ uint addr;
+ uint value;
+ uint mask;
+} __packed;
+
+#define CFF_OP_WAIT_IRQ 0x00000005
+struct cff_op_wait_irq {
+ unsigned char op;
+} __packed;
+
+#define CFF_OP_RMW 0x0000000a
+
+#define CFF_OP_WRITE_MEM 0x0000000b
+struct cff_op_write_mem {
+ unsigned char op;
+ uint addr;
+ uint value;
+} __packed;
+
+#define CFF_OP_WRITE_MEMBUF 0x0000000c
+struct cff_op_write_membuf {
+ unsigned char op;
+ uint addr;
+ ushort count;
+ uint buffer[MEMBUF_SIZE];
+} __packed;
+
+#define CFF_OP_MEMORY_BASE 0x0000000d
+struct cff_op_memory_base {
+ unsigned char op;
+ uint base;
+ uint size;
+ uint gmemsize;
+} __packed;
+
+#define CFF_OP_HANG 0x0000000e
+struct cff_op_hang {
+ unsigned char op;
+} __packed;
+
+#define CFF_OP_EOF 0xffffffff
+struct cff_op_eof {
+ unsigned char op;
+} __packed;
+
+#define CFF_OP_VERIFY_MEM_FILE 0x00000007
+#define CFF_OP_WRITE_SURFACE_PARAMS 0x00000011
+struct cff_op_user_event {
+ unsigned char op;
+ unsigned int op1;
+ unsigned int op2;
+ unsigned int op3;
+ unsigned int op4;
+ unsigned int op5;
+} __packed;
+
+
+static void b64_encodeblock(unsigned char in[3], unsigned char out[4], int len)
+{
+ static const char tob64[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmno"
+ "pqrstuvwxyz0123456789+/";
+
+ out[0] = tob64[in[0] >> 2];
+ out[1] = tob64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
+ out[2] = (unsigned char) (len > 1 ? tob64[((in[1] & 0x0f) << 2)
+ | ((in[2] & 0xc0) >> 6)] : '=');
+ out[3] = (unsigned char) (len > 2 ? tob64[in[2] & 0x3f] : '=');
+}
+
+static void b64_encode(const unsigned char *in_buf, int in_size,
+ unsigned char *out_buf, int out_bufsize, int *out_size)
+{
+ unsigned char in[3], out[4];
+ int i, len;
+
+ *out_size = 0;
+ while (in_size > 0) {
+ len = 0;
+ for (i = 0; i < 3; ++i) {
+ if (in_size-- > 0) {
+ in[i] = *in_buf++;
+ ++len;
+ } else
+ in[i] = 0;
+ }
+ if (len) {
+ b64_encodeblock(in, out, len);
+ if (out_bufsize < 4) {
+ pr_warn("kgsl: cffdump: %s: out of buffer\n",
+ __func__);
+ return;
+ }
+ for (i = 0; i < 4; ++i)
+ *out_buf++ = out[i];
+ *out_size += 4;
+ out_bufsize -= 4;
+ }
+ }
+}
+
+#define KLOG_TMPBUF_SIZE (1024)
+static void klog_printk(const char *fmt, ...)
+{
+ /* per-cpu klog formatting temporary buffer */
+ static char klog_buf[NR_CPUS][KLOG_TMPBUF_SIZE];
+
+ va_list args;
+ int len;
+ char *cbuf;
+ unsigned long flags;
+
+ local_irq_save(flags);
+ cbuf = klog_buf[smp_processor_id()];
+ va_start(args, fmt);
+ len = vsnprintf(cbuf, KLOG_TMPBUF_SIZE, fmt, args);
+ total_bytes += len;
+ va_end(args);
+ relay_write(chan, cbuf, len);
+ local_irq_restore(flags);
+}
+
+static struct cff_op_write_membuf cff_op_write_membuf;
+static void cffdump_membuf(int id, unsigned char *out_buf, int out_bufsize)
+{
+ void *data;
+ int len, out_size;
+ struct cff_op_write_mem cff_op_write_mem;
+
+ uint addr = cff_op_write_membuf.addr
+ - sizeof(uint)*cff_op_write_membuf.count;
+
+ if (!cff_op_write_membuf.count) {
+ pr_warn("kgsl: cffdump: membuf: count == 0, skipping");
+ return;
+ }
+
+ if (cff_op_write_membuf.count != 1) {
+ cff_op_write_membuf.op = CFF_OP_WRITE_MEMBUF;
+ cff_op_write_membuf.addr = addr;
+ len = sizeof(cff_op_write_membuf) -
+ sizeof(uint)*(MEMBUF_SIZE - cff_op_write_membuf.count);
+ data = &cff_op_write_membuf;
+ } else {
+ cff_op_write_mem.op = CFF_OP_WRITE_MEM;
+ cff_op_write_mem.addr = addr;
+ cff_op_write_mem.value = cff_op_write_membuf.buffer[0];
+ data = &cff_op_write_mem;
+ len = sizeof(cff_op_write_mem);
+ }
+ b64_encode(data, len, out_buf, out_bufsize, &out_size);
+ out_buf[out_size] = 0;
+ klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf);
+ cff_op_write_membuf.count = 0;
+ cff_op_write_membuf.addr = 0;
+}
+
+static void cffdump_printline(int id, uint opcode, uint op1, uint op2,
+ uint op3, uint op4, uint op5)
+{
+ struct cff_op_write_reg cff_op_write_reg;
+ struct cff_op_poll_reg cff_op_poll_reg;
+ struct cff_op_wait_irq cff_op_wait_irq;
+ struct cff_op_memory_base cff_op_memory_base;
+ struct cff_op_hang cff_op_hang;
+ struct cff_op_eof cff_op_eof;
+ struct cff_op_user_event cff_op_user_event;
+ unsigned char out_buf[sizeof(cff_op_write_membuf)/3*4 + 16];
+ void *data;
+ int len = 0, out_size;
+ long cur_secs;
+
+ spin_lock(&cffdump_lock);
+ if (opcode == CFF_OP_WRITE_MEM) {
+ if ((cff_op_write_membuf.addr != op1 &&
+ cff_op_write_membuf.count)
+ || (cff_op_write_membuf.count == MEMBUF_SIZE))
+ cffdump_membuf(id, out_buf, sizeof(out_buf));
+
+ cff_op_write_membuf.buffer[cff_op_write_membuf.count++] = op2;
+ cff_op_write_membuf.addr = op1 + sizeof(uint);
+ spin_unlock(&cffdump_lock);
+ return;
+ } else if (cff_op_write_membuf.count)
+ cffdump_membuf(id, out_buf, sizeof(out_buf));
+ spin_unlock(&cffdump_lock);
+
+ switch (opcode) {
+ case CFF_OP_WRITE_REG:
+ cff_op_write_reg.op = opcode;
+ cff_op_write_reg.addr = op1;
+ cff_op_write_reg.value = op2;
+ data = &cff_op_write_reg;
+ len = sizeof(cff_op_write_reg);
+ break;
+
+ case CFF_OP_POLL_REG:
+ cff_op_poll_reg.op = opcode;
+ cff_op_poll_reg.addr = op1;
+ cff_op_poll_reg.value = op2;
+ cff_op_poll_reg.mask = op3;
+ data = &cff_op_poll_reg;
+ len = sizeof(cff_op_poll_reg);
+ break;
+
+ case CFF_OP_WAIT_IRQ:
+ cff_op_wait_irq.op = opcode;
+ data = &cff_op_wait_irq;
+ len = sizeof(cff_op_wait_irq);
+ break;
+
+ case CFF_OP_MEMORY_BASE:
+ cff_op_memory_base.op = opcode;
+ cff_op_memory_base.base = op1;
+ cff_op_memory_base.size = op2;
+ cff_op_memory_base.gmemsize = op3;
+ data = &cff_op_memory_base;
+ len = sizeof(cff_op_memory_base);
+ break;
+
+ case CFF_OP_HANG:
+ cff_op_hang.op = opcode;
+ data = &cff_op_hang;
+ len = sizeof(cff_op_hang);
+ break;
+
+ case CFF_OP_EOF:
+ cff_op_eof.op = opcode;
+ data = &cff_op_eof;
+ len = sizeof(cff_op_eof);
+ break;
+
+ case CFF_OP_WRITE_SURFACE_PARAMS:
+ case CFF_OP_VERIFY_MEM_FILE:
+ cff_op_user_event.op = opcode;
+ cff_op_user_event.op1 = op1;
+ cff_op_user_event.op2 = op2;
+ cff_op_user_event.op3 = op3;
+ cff_op_user_event.op4 = op4;
+ cff_op_user_event.op5 = op5;
+ data = &cff_op_user_event;
+ len = sizeof(cff_op_user_event);
+ break;
+ }
+
+ if (len) {
+ b64_encode(data, len, out_buf, sizeof(out_buf), &out_size);
+ out_buf[out_size] = 0;
+ klog_printk("%ld:%d;%s\n", ++serial_nr, id, out_buf);
+ } else
+ pr_warn("kgsl: cffdump: unhandled opcode: %d\n", opcode);
+
+ cur_secs = get_seconds();
+ if ((cur_secs - last_sec) > 10 || (last_sec - cur_secs) > 10) {
+ pr_info("kgsl: cffdump: total [bytes:%lu kB, syncmem:%lu kB], "
+ "seq#: %lu\n", total_bytes/1024, total_syncmem/1024,
+ serial_nr);
+ last_sec = cur_secs;
+ }
+}
+
+void kgsl_cffdump_init()
+{
+ struct dentry *debugfs_dir = kgsl_get_debugfs_dir();
+
+#ifdef ALIGN_CPU
+ cpumask_t mask;
+
+ cpumask_clear(&mask);
+ cpumask_set_cpu(0, &mask);
+ sched_setaffinity(0, &mask);
+#endif
+ if (!debugfs_dir || IS_ERR(debugfs_dir)) {
+ KGSL_CORE_ERR("Debugfs directory is bad\n");
+ return;
+ }
+
+ kgsl_cff_dump_enable = 1;
+
+ spin_lock_init(&cffdump_lock);
+
+ dir = debugfs_create_dir("cff", debugfs_dir);
+ if (!dir) {
+ KGSL_CORE_ERR("debugfs_create_dir failed\n");
+ return;
+ }
+
+ chan = create_channel(subbuf_size, n_subbufs);
+}
+
+void kgsl_cffdump_destroy()
+{
+ if (chan)
+ relay_flush(chan);
+ destroy_channel();
+ if (dir)
+ debugfs_remove(dir);
+}
+
+void kgsl_cffdump_open(enum kgsl_deviceid device_id)
+{
+ kgsl_cffdump_memory_base(device_id, KGSL_PAGETABLE_BASE,
+ kgsl_mmu_get_ptsize(), SZ_256K);
+}
+
+void kgsl_cffdump_memory_base(enum kgsl_deviceid device_id, unsigned int base,
+ unsigned int range, unsigned gmemsize)
+{
+ cffdump_printline(device_id, CFF_OP_MEMORY_BASE, base,
+ range, gmemsize, 0, 0);
+}
+
+void kgsl_cffdump_hang(enum kgsl_deviceid device_id)
+{
+ cffdump_printline(device_id, CFF_OP_HANG, 0, 0, 0, 0, 0);
+}
+
+void kgsl_cffdump_close(enum kgsl_deviceid device_id)
+{
+ cffdump_printline(device_id, CFF_OP_EOF, 0, 0, 0, 0, 0);
+}
+
+void kgsl_cffdump_user_event(unsigned int cff_opcode, unsigned int op1,
+ unsigned int op2, unsigned int op3,
+ unsigned int op4, unsigned int op5)
+{
+ cffdump_printline(-1, cff_opcode, op1, op2, op3, op4, op5);
+}
+
+void kgsl_cffdump_syncmem(struct kgsl_device_private *dev_priv,
+ const struct kgsl_memdesc *memdesc, uint gpuaddr, uint sizebytes,
+ bool clean_cache)
+{
+ const void *src;
+
+ if (!kgsl_cff_dump_enable)
+ return;
+
+ total_syncmem += sizebytes;
+
+ if (memdesc == NULL) {
+ struct kgsl_mem_entry *entry;
+ spin_lock(&dev_priv->process_priv->mem_lock);
+ entry = kgsl_sharedmem_find_region(dev_priv->process_priv,
+ gpuaddr, sizebytes);
+ spin_unlock(&dev_priv->process_priv->mem_lock);
+ if (entry == NULL) {
+ KGSL_CORE_ERR("did not find mapping "
+ "for gpuaddr: 0x%08x\n", gpuaddr);
+ return;
+ }
+ memdesc = &entry->memdesc;
+ }
+ src = (uint *)kgsl_gpuaddr_to_vaddr(memdesc, gpuaddr);
+ if (memdesc->hostptr == NULL) {
+ KGSL_CORE_ERR("no kernel mapping for "
+ "gpuaddr: 0x%08x, m->host: 0x%p, phys: 0x%08x\n",
+ gpuaddr, memdesc->hostptr, memdesc->physaddr);
+ return;
+ }
+
+ if (clean_cache) {
+ /* Ensure that this memory region is not read from the
+ * cache but fetched fresh */
+
+ mb();
+
+ kgsl_cache_range_op((struct kgsl_memdesc *)memdesc,
+ KGSL_CACHE_OP_INV);
+ }
+
+ while (sizebytes > 3) {
+ cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src,
+ 0, 0, 0);
+ gpuaddr += 4;
+ src += 4;
+ sizebytes -= 4;
+ }
+ if (sizebytes > 0)
+ cffdump_printline(-1, CFF_OP_WRITE_MEM, gpuaddr, *(uint *)src,
+ 0, 0, 0);
+}
+
+void kgsl_cffdump_setmem(uint addr, uint value, uint sizebytes)
+{
+ if (!kgsl_cff_dump_enable)
+ return;
+
+ while (sizebytes > 3) {
+ /* Use 32bit memory writes as long as there's at least
+ * 4 bytes left */
+ cffdump_printline(-1, CFF_OP_WRITE_MEM, addr, value,
+ 0, 0, 0);
+ addr += 4;
+ sizebytes -= 4;
+ }
+ if (sizebytes > 0)
+ cffdump_printline(-1, CFF_OP_WRITE_MEM, addr, value,
+ 0, 0, 0);
+}
+
+void kgsl_cffdump_regwrite(enum kgsl_deviceid device_id, uint addr,
+ uint value)
+{
+ if (!kgsl_cff_dump_enable)
+ return;
+
+ cffdump_printline(device_id, CFF_OP_WRITE_REG, addr, value,
+ 0, 0, 0);
+}
+
+void kgsl_cffdump_regpoll(enum kgsl_deviceid device_id, uint addr,
+ uint value, uint mask)
+{
+ if (!kgsl_cff_dump_enable)
+ return;
+
+ cffdump_printline(device_id, CFF_OP_POLL_REG, addr, value,
+ mask, 0, 0);
+}
+
+void kgsl_cffdump_slavewrite(uint addr, uint value)
+{
+ if (!kgsl_cff_dump_enable)
+ return;
+
+ cffdump_printline(-1, CFF_OP_WRITE_REG, addr, value, 0, 0, 0);
+}
+
+int kgsl_cffdump_waitirq(void)
+{
+ if (!kgsl_cff_dump_enable)
+ return 0;
+
+ cffdump_printline(-1, CFF_OP_WAIT_IRQ, 0, 0, 0, 0, 0);
+
+ return 1;
+}
+EXPORT_SYMBOL(kgsl_cffdump_waitirq);
+
+static int subbuf_start_handler(struct rchan_buf *buf,
+ void *subbuf, void *prev_subbuf, uint prev_padding)
+{
+ pr_debug("kgsl: cffdump: subbuf_start_handler(subbuf=%p, prev_subbuf"
+ "=%p, prev_padding=%08x)\n", subbuf, prev_subbuf, prev_padding);
+
+ if (relay_buf_full(buf)) {
+ if (!suspended) {
+ suspended = 1;
+ pr_warn("kgsl: cffdump: relay: cpu %d buffer full!!!\n",
+ smp_processor_id());
+ }
+ dropped++;
+ return 0;
+ } else if (suspended) {
+ suspended = 0;
+ pr_warn("kgsl: cffdump: relay: cpu %d buffer no longer full.\n",
+ smp_processor_id());
+ }
+
+ subbuf_start_reserve(buf, 0);
+ return 1;
+}
+
+static struct dentry *create_buf_file_handler(const char *filename,
+ struct dentry *parent, int mode, struct rchan_buf *buf,
+ int *is_global)
+{
+ return debugfs_create_file(filename, mode, parent, buf,
+ &relay_file_operations);
+}
+
+/*
+ * file_remove() default callback. Removes relay file in debugfs.
+ */
+static int remove_buf_file_handler(struct dentry *dentry)
+{
+ pr_info("kgsl: cffdump: %s()\n", __func__);
+ debugfs_remove(dentry);
+ return 0;
+}
+
+/*
+ * relay callbacks
+ */
+static struct rchan_callbacks relay_callbacks = {
+ .subbuf_start = subbuf_start_handler,
+ .create_buf_file = create_buf_file_handler,
+ .remove_buf_file = remove_buf_file_handler,
+};
+
+/**
+ * create_channel - creates channel /debug/klog/cpuXXX
+ *
+ * Creates channel along with associated produced/consumed control files
+ *
+ * Returns channel on success, NULL otherwise
+ */
+static struct rchan *create_channel(unsigned subbuf_size, unsigned n_subbufs)
+{
+ struct rchan *chan;
+
+ pr_info("kgsl: cffdump: relay: create_channel: subbuf_size %u, "
+ "n_subbufs %u, dir 0x%p\n", subbuf_size, n_subbufs, dir);
+
+ chan = relay_open("cpu", dir, subbuf_size,
+ n_subbufs, &relay_callbacks, NULL);
+ if (!chan) {
+ KGSL_CORE_ERR("relay_open failed\n");
+ return NULL;
+ }
+
+ suspended = 0;
+ dropped = 0;
+
+ return chan;
+}
+
+/**
+ * destroy_channel - destroys channel /debug/kgsl/cff/cpuXXX
+ *
+ * Destroys channel along with associated produced/consumed control files
+ */
+static void destroy_channel(void)
+{
+ pr_info("kgsl: cffdump: relay: destroy_channel\n");
+ if (chan) {
+ relay_close(chan);
+ chan = NULL;
+ }
+}
+
diff --git a/drivers/gpu/msm/kgsl_cffdump.h b/drivers/gpu/msm/kgsl_cffdump.h
index cea8ea0..2733cc3 100644
--- a/drivers/gpu/msm/kgsl_cffdump.h
+++ b/drivers/gpu/msm/kgsl_cffdump.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -64,6 +64,6 @@
#define kgsl_cffdump_user_event(cff_opcode, op1, op2, op3, op4, op5) \
(void)param
-#endif
+#endif /* CONFIG_MSM_KGSL_CFF_DUMP */
-#endif
+#endif /* __KGSL_CFFDUMP_H */
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index 68fee6d..b41bd6b 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2008-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -16,11 +16,86 @@
#include "kgsl.h"
#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+/*default log levels is error for everything*/
#define KGSL_LOG_LEVEL_DEFAULT 3
#define KGSL_LOG_LEVEL_MAX 7
struct dentry *kgsl_debugfs_dir;
+static struct dentry *pm_d_debugfs;
+struct dentry *proc_d_debugfs;
+
+static int pm_dump_set(void *data, u64 val)
+{
+ struct kgsl_device *device = data;
+
+ if (val) {
+ mutex_lock(&device->mutex);
+ kgsl_postmortem_dump(device, 1);
+ mutex_unlock(&device->mutex);
+ }
+
+ return 0;
+}
+DEFINE_SIMPLE_ATTRIBUTE(pm_dump_fops,
+ NULL,
+ pm_dump_set, "%llu\n");
+
+static int pm_regs_enabled_set(void *data, u64 val)
+{
+ struct kgsl_device *device = data;
+ device->pm_regs_enabled = val ? 1 : 0;
+ return 0;
+}
+
+static int pm_regs_enabled_get(void *data, u64 *val)
+{
+ struct kgsl_device *device = data;
+ *val = device->pm_regs_enabled;
+ return 0;
+}
+
+static int pm_ib_enabled_set(void *data, u64 val)
+{
+ struct kgsl_device *device = data;
+ device->pm_ib_enabled = val ? 1 : 0;
+ return 0;
+}
+
+static int pm_ib_enabled_get(void *data, u64 *val)
+{
+ struct kgsl_device *device = data;
+ *val = device->pm_ib_enabled;
+ return 0;
+}
+
+static int pm_enabled_set(void *data, u64 val)
+{
+ struct kgsl_device *device = data;
+ device->pm_dump_enable = val;
+ return 0;
+}
+
+static int pm_enabled_get(void *data, u64 *val)
+{
+ struct kgsl_device *device = data;
+ *val = device->pm_dump_enable;
+ return 0;
+}
+
+
+DEFINE_SIMPLE_ATTRIBUTE(pm_regs_enabled_fops,
+ pm_regs_enabled_get,
+ pm_regs_enabled_set, "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(pm_ib_enabled_fops,
+ pm_ib_enabled_get,
+ pm_ib_enabled_set, "%llu\n");
+
+DEFINE_SIMPLE_ATTRIBUTE(pm_enabled_fops,
+ pm_enabled_get,
+ pm_enabled_set, "%llu\n");
static inline int kgsl_log_set(unsigned int *log_val, void *data, u64 val)
{
@@ -48,6 +123,7 @@
KGSL_DEBUGFS_LOG(ctxt_log);
KGSL_DEBUGFS_LOG(mem_log);
KGSL_DEBUGFS_LOG(pwr_log);
+KGSL_DEBUGFS_LOG(ft_log);
void kgsl_device_debugfs_init(struct kgsl_device *device)
{
@@ -63,6 +139,7 @@
device->drv_log = KGSL_LOG_LEVEL_DEFAULT;
device->mem_log = KGSL_LOG_LEVEL_DEFAULT;
device->pwr_log = KGSL_LOG_LEVEL_DEFAULT;
+ device->ft_log = KGSL_LOG_LEVEL_DEFAULT;
debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device,
&cmd_log_fops);
@@ -74,11 +151,113 @@
&mem_log_fops);
debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
&pwr_log_fops);
+ debugfs_create_file("log_level_ft", 0644, device->d_debugfs, device,
+ &ft_log_fops);
+
+ /* Create postmortem dump control files */
+
+ pm_d_debugfs = debugfs_create_dir("postmortem", device->d_debugfs);
+
+ if (IS_ERR(pm_d_debugfs))
+ return;
+
+ debugfs_create_file("dump", 0600, pm_d_debugfs, device,
+ &pm_dump_fops);
+ debugfs_create_file("regs_enabled", 0644, pm_d_debugfs, device,
+ &pm_regs_enabled_fops);
+ debugfs_create_file("ib_enabled", 0644, pm_d_debugfs, device,
+ &pm_ib_enabled_fops);
+ device->pm_dump_enable = 0;
+ debugfs_create_file("enable", 0644, pm_d_debugfs, device,
+ &pm_enabled_fops);
+
+}
+
+static const char * const memtype_strings[] = {
+ "gpumem",
+ "pmem",
+ "ashmem",
+ "usermap",
+ "ion",
+};
+
+static const char *memtype_str(int memtype)
+{
+ if (memtype < ARRAY_SIZE(memtype_strings))
+ return memtype_strings[memtype];
+ return "unknown";
+}
+
+static char get_alignflag(const struct kgsl_memdesc *m)
+{
+ int align = kgsl_memdesc_get_align(m);
+ if (align >= ilog2(SZ_1M))
+ return 'L';
+ else if (align >= ilog2(SZ_64K))
+ return 'l';
+ return '-';
+}
+
+static int process_mem_print(struct seq_file *s, void *unused)
+{
+ struct kgsl_mem_entry *entry;
+ struct rb_node *node;
+ struct kgsl_process_private *private = s->private;
+ char flags[4];
+ char usage[16];
+
+ spin_lock(&private->mem_lock);
+ seq_printf(s, "%8s %8s %5s %10s %16s %5s\n",
+ "gpuaddr", "size", "flags", "type", "usage", "sglen");
+ for (node = rb_first(&private->mem_rb); node; node = rb_next(node)) {
+ struct kgsl_memdesc *m;
+
+ entry = rb_entry(node, struct kgsl_mem_entry, node);
+ m = &entry->memdesc;
+
+ flags[0] = m->priv & KGSL_MEMDESC_GLOBAL ? 'g' : '-';
+ flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
+ flags[2] = get_alignflag(m);
+ flags[3] = '\0';
+
+ kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+ seq_printf(s, "%08x %8d %5s %10s %16s %5d\n",
+ m->gpuaddr, m->size, flags,
+ memtype_str(entry->memtype), usage, m->sglen);
+ }
+ spin_unlock(&private->mem_lock);
+ return 0;
+}
+
+static int process_mem_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, process_mem_print, inode->i_private);
+}
+
+static const struct file_operations process_mem_fops = {
+ .open = process_mem_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
+
+void
+kgsl_process_init_debugfs(struct kgsl_process_private *private)
+{
+ unsigned char name[16];
+
+ snprintf(name, sizeof(name), "%d", private->pid);
+
+ private->debug_root = debugfs_create_dir(name, proc_d_debugfs);
+ debugfs_create_file("mem", 0400, private->debug_root, private,
+ &process_mem_fops);
}
void kgsl_core_debugfs_init(void)
{
kgsl_debugfs_dir = debugfs_create_dir("kgsl", 0);
+ proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir);
}
void kgsl_core_debugfs_close(void)
diff --git a/drivers/gpu/msm/kgsl_debugfs.h b/drivers/gpu/msm/kgsl_debugfs.h
index 5e10988..ae5601f 100644
--- a/drivers/gpu/msm/kgsl_debugfs.h
+++ b/drivers/gpu/msm/kgsl_debugfs.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -15,6 +15,7 @@
#define _KGSL_DEBUGFS_H
struct kgsl_device;
+struct kgsl_process_private;
#ifdef CONFIG_DEBUG_FS
void kgsl_core_debugfs_init(void);
@@ -28,11 +29,16 @@
return kgsl_debugfs_dir;
}
+int kgsl_process_init_debugfs(struct kgsl_process_private *);
#else
static inline void kgsl_core_debugfs_init(void) { }
static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { }
static inline void kgsl_core_debugfs_close(void) { }
static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; }
+static inline int kgsl_process_init_debugfs(struct kgsl_process_private *)
+{
+ return 0;
+}
#endif
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index df61717..b215d8c 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -22,14 +22,23 @@
#include "kgsl_pwrctrl.h"
#include "kgsl_log.h"
#include "kgsl_pwrscale.h"
+#include <linux/sync.h>
-#define KGSL_TIMEOUT_NONE 0
-#define KGSL_TIMEOUT_DEFAULT 0xFFFFFFFF
-#define KGSL_TIMEOUT_PART 2000
+#define KGSL_TIMEOUT_NONE 0
+#define KGSL_TIMEOUT_DEFAULT 0xFFFFFFFF
+#define KGSL_TIMEOUT_PART 50 /* 50 msec */
+#define KGSL_TIMEOUT_LONG_IB_DETECTION 2000 /* 2 sec*/
#define FIRST_TIMEOUT (HZ / 2)
+/* KGSL device state is initialized to INIT when platform_probe *
+ * sucessfully initialized the device. Once a device has been opened *
+ * (started) it becomes active. NAP implies that only low latency *
+ * resources (for now clocks on some platforms) are off. SLEEP implies *
+ * that the KGSL module believes a device is idle (has been inactive *
+ * past its timer) and all system resources are released. SUSPEND is *
+ * requested by the kernel and will be enforced upon all open devices. */
#define KGSL_STATE_NONE 0x00000000
#define KGSL_STATE_INIT 0x00000001
@@ -38,7 +47,7 @@
#define KGSL_STATE_SLEEP 0x00000008
#define KGSL_STATE_SUSPEND 0x00000010
#define KGSL_STATE_HUNG 0x00000020
-#define KGSL_STATE_DUMP_AND_RECOVER 0x00000040
+#define KGSL_STATE_DUMP_AND_FT 0x00000040
#define KGSL_STATE_SLUMBER 0x00000080
#define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK 0x1000000
@@ -50,8 +59,13 @@
struct kgsl_device_private;
struct kgsl_context;
struct kgsl_power_stats;
+struct kgsl_event;
struct kgsl_functable {
+ /* Mandatory functions - these functions must be implemented
+ by the client device. The driver will not check for a NULL
+ pointer before calling the hook.
+ */
void (*regread) (struct kgsl_device *device,
unsigned int offsetwords, unsigned int *value);
void (*regwrite) (struct kgsl_device *device,
@@ -84,6 +98,9 @@
void * (*snapshot)(struct kgsl_device *device, void *snapshot,
int *remain, int hang);
irqreturn_t (*irq_handler)(struct kgsl_device *device);
+ /* Optional functions - these functions are not mandatory. The
+ driver will check that the function pointer is not NULL before
+ calling the hook */
void (*setstate) (struct kgsl_device *device, unsigned int context_id,
uint32_t flags);
int (*drawctxt_create) (struct kgsl_device *device,
@@ -96,8 +113,12 @@
int (*setproperty) (struct kgsl_device *device,
enum kgsl_property_type type, void *value,
unsigned int sizebytes);
+ int (*postmortem_dump) (struct kgsl_device *device, int manual);
+ int (*next_event)(struct kgsl_device *device,
+ struct kgsl_event *event);
};
+/* MH register values */
struct kgsl_mh {
unsigned int mharb;
unsigned int mh_intf_cfg1;
@@ -113,12 +134,9 @@
void *priv;
struct list_head list;
void *owner;
+ unsigned int created;
};
-struct kgsl_gpubusy {
- s64 busy;
- s64 total;
-};
struct kgsl_device {
struct device *dev;
@@ -153,74 +171,94 @@
wait_queue_head_t wait_queue;
struct workqueue_struct *work_queue;
struct device *parentdev;
- struct completion recovery_gate;
+ struct completion ft_gate;
struct dentry *d_debugfs;
struct idr context_idr;
struct early_suspend display_off;
- void *snapshot;
- int snapshot_maxsize;
- int snapshot_size;
- u32 snapshot_timestamp;
- int snapshot_frozen;
- int snapshot_no_panic;
+ void *snapshot; /* Pointer to the snapshot memory region */
+ int snapshot_maxsize; /* Max size of the snapshot region */
+ int snapshot_size; /* Current size of the snapshot region */
+ u32 snapshot_timestamp; /* Timestamp of the last valid snapshot */
+ int snapshot_frozen; /* 1 if the snapshot output is frozen until
+ it gets read by the user. This avoids
+ losing the output on multiple hangs */
struct kobject snapshot_kobj;
+ /*
+ * List of GPU buffers that have been frozen in memory until they can be
+ * dumped
+ */
struct list_head snapshot_obj_list;
-
+ /* Logging levels */
int cmd_log;
int ctxt_log;
int drv_log;
int mem_log;
int pwr_log;
+ int ft_log;
+ int pm_dump_enable;
struct kgsl_pwrscale pwrscale;
struct kobject pwrscale_kobj;
struct pm_qos_request pm_qos_req_dma;
struct work_struct ts_expired_ws;
struct list_head events;
+ struct list_head events_pending_list;
s64 on_time;
-
- struct kgsl_gpubusy gputime;
- struct kgsl_gpubusy gputime_in_state[KGSL_MAX_PWRLEVELS];
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- struct kgsl_process_private *current_process_priv;
-#endif
-#if defined(CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE)
- int prev_pid;
-#endif
+ /* Postmortem Control switches */
+ int pm_regs_enabled;
+ int pm_ib_enabled;
};
-void kgsl_timestamp_expired(struct work_struct *work);
+void kgsl_process_events(struct work_struct *work);
+void kgsl_check_fences(struct work_struct *work);
#define KGSL_DEVICE_COMMON_INIT(_dev) \
.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
.suspend_gate = COMPLETION_INITIALIZER((_dev).suspend_gate),\
- .recovery_gate = COMPLETION_INITIALIZER((_dev).recovery_gate),\
+ .ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\
.ts_notifier_list = ATOMIC_NOTIFIER_INIT((_dev).ts_notifier_list),\
.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
kgsl_idle_check),\
.ts_expired_ws = __WORK_INITIALIZER((_dev).ts_expired_ws,\
- kgsl_timestamp_expired),\
+ kgsl_process_events),\
.context_idr = IDR_INIT((_dev).context_idr),\
.events = LIST_HEAD_INIT((_dev).events),\
+ .events_pending_list = LIST_HEAD_INIT((_dev).events_pending_list), \
.wait_queue = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).wait_queue),\
.mutex = __MUTEX_INITIALIZER((_dev).mutex),\
.state = KGSL_STATE_INIT,\
.ver_major = DRIVER_VERSION_MAJOR,\
.ver_minor = DRIVER_VERSION_MINOR
+
+/**
+ * struct kgsl_context - Master structure for a KGSL context object
+ * @refcount - kref object for reference counting the context
+ * @id - integer identifier for the context
+ * @dev_priv - pointer to the owning device instance
+ * @devctxt - pointer to the device specific context information
+ * @reset_status - status indication whether a gpu reset occured and whether
+ * this context was responsible for causing it
+ * @wait_on_invalid_ts - flag indicating if this context has tried to wait on a
+ * bad timestamp
+ * @timeline - sync timeline used to create fences that can be signaled when a
+ * sync_pt timestamp expires
+ * @events - list head of pending events for this context
+ * @events_list - list node for the list of all contexts that have pending events
+ */
struct kgsl_context {
struct kref refcount;
uint32_t id;
-
-
struct kgsl_device_private *dev_priv;
-
-
void *devctxt;
unsigned int reset_status;
+ bool wait_on_invalid_ts;
+ struct sync_timeline *timeline;
+ struct list_head events;
+ struct list_head events_list;
};
struct kgsl_process_private {
@@ -231,15 +269,12 @@
struct kgsl_pagetable *pagetable;
struct list_head list;
struct kobject kobj;
+ struct dentry *debug_root;
struct {
unsigned int cur;
unsigned int max;
} stats[KGSL_MEM_ENTRY_MAX];
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- struct kgsl_gpubusy gputime;
- struct kgsl_gpubusy gputime_in_state[KGSL_MAX_PWRLEVELS];
-#endif
};
struct kgsl_device_private {
@@ -262,12 +297,6 @@
priv->stats[type].max = priv->stats[type].cur;
}
-static inline void kgsl_process_sub_stats(struct kgsl_process_private *priv,
- unsigned int type, size_t size)
-{
- priv->stats[type].cur -= size;
-}
-
static inline void kgsl_regread(struct kgsl_device *device,
unsigned int offsetwords,
unsigned int *value)
@@ -353,6 +382,8 @@
struct kgsl_context *ctxt =
idr_find(&dev_priv->device->context_idr, id);
+ /* Make sure that the context belongs to the current instance so
+ that other processes can't guess context IDs and mess things up */
return (ctxt && ctxt->dev_priv == dev_priv) ? ctxt : NULL;
}
@@ -385,6 +416,15 @@
return pdev->dev.platform_data;
}
+/**
+ * kgsl_context_get - Get context reference count
+ * @context
+ *
+ * Asynchronous code that holds a pointer to a context
+ * must hold a reference count on it. The kgsl device
+ * mutex must be held while the context reference count
+ * is changed.
+ */
static inline void
kgsl_context_get(struct kgsl_context *context)
{
@@ -393,10 +433,34 @@
void kgsl_context_destroy(struct kref *kref);
+/**
+ * kgsl_context_put - Release context reference count
+ * @context
+ *
+ */
static inline void
kgsl_context_put(struct kgsl_context *context)
{
kref_put(&context->refcount, kgsl_context_destroy);
}
-#endif
+/**
+ * kgsl_active_count_put - Decrease the device active count
+ * @device: Pointer to a KGSL device
+ *
+ * Decrease the active count for the KGSL device and trigger the suspend_gate
+ * completion if it hits zero
+ */
+static inline void
+kgsl_active_count_put(struct kgsl_device *device)
+{
+ if (device->active_cnt == 1)
+ INIT_COMPLETION(device->suspend_gate);
+
+ device->active_cnt--;
+
+ if (device->active_cnt == 0)
+ complete(&device->suspend_gate);
+}
+
+#endif /* __KGSL_DEVICE_H */
diff --git a/drivers/gpu/msm/kgsl_drm.c b/drivers/gpu/msm/kgsl_drm.c
new file mode 100644
index 0000000..2a5a5fa
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_drm.c
@@ -0,0 +1,1507 @@
+/* Copyright (c) 2009-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+/* Implements an interface between KGSL and the DRM subsystem. For now this
+ * is pretty simple, but it will take on more of the workload as time goes
+ * on
+ */
+#include "drmP.h"
+#include "drm.h"
+#include <linux/android_pmem.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_drm.h"
+#include "kgsl_mmu.h"
+#include "kgsl_sharedmem.h"
+
+#define DRIVER_AUTHOR "Qualcomm"
+#define DRIVER_NAME "kgsl"
+#define DRIVER_DESC "KGSL DRM"
+#define DRIVER_DATE "20100127"
+
+#define DRIVER_MAJOR 2
+#define DRIVER_MINOR 1
+#define DRIVER_PATCHLEVEL 1
+
+#define DRM_KGSL_GEM_FLAG_MAPPED (1 << 0)
+
+#define ENTRY_EMPTY -1
+#define ENTRY_NEEDS_CLEANUP -2
+
+#define DRM_KGSL_NOT_INITED -1
+#define DRM_KGSL_INITED 1
+
+#define DRM_KGSL_NUM_FENCE_ENTRIES (DRM_KGSL_HANDLE_WAIT_ENTRIES << 2)
+#define DRM_KGSL_HANDLE_WAIT_ENTRIES 5
+
+/* Returns true if the memory type is in PMEM */
+
+#ifdef CONFIG_KERNEL_PMEM_SMI_REGION
+#define TYPE_IS_PMEM(_t) \
+ (((_t & DRM_KGSL_GEM_TYPE_MEM_MASK) == DRM_KGSL_GEM_TYPE_EBI) || \
+ ((_t & DRM_KGSL_GEM_TYPE_MEM_MASK) == DRM_KGSL_GEM_TYPE_SMI) || \
+ ((_t) & DRM_KGSL_GEM_TYPE_PMEM))
+#else
+#define TYPE_IS_PMEM(_t) \
+ (((_t & DRM_KGSL_GEM_TYPE_MEM_MASK) == DRM_KGSL_GEM_TYPE_EBI) || \
+ ((_t) & (DRM_KGSL_GEM_TYPE_PMEM | DRM_KGSL_GEM_PMEM_EBI)))
+#endif
+
+/* Returns true if the memory type is regular */
+
+#define TYPE_IS_MEM(_t) \
+ (((_t & DRM_KGSL_GEM_TYPE_MEM_MASK) == DRM_KGSL_GEM_TYPE_KMEM) || \
+ ((_t & DRM_KGSL_GEM_TYPE_MEM_MASK) == DRM_KGSL_GEM_TYPE_KMEM_NOCACHE) || \
+ ((_t) & DRM_KGSL_GEM_TYPE_MEM))
+
+#define TYPE_IS_FD(_t) ((_t) & DRM_KGSL_GEM_TYPE_FD_MASK)
+
+/* Returns true if KMEM region is uncached */
+
+#define IS_MEM_UNCACHED(_t) \
+ ((_t == DRM_KGSL_GEM_TYPE_KMEM_NOCACHE) || \
+ (_t == DRM_KGSL_GEM_TYPE_KMEM) || \
+ (TYPE_IS_MEM(_t) && (_t & DRM_KGSL_GEM_CACHE_WCOMBINE)))
+
+struct drm_kgsl_gem_object_wait_list_entry {
+ struct list_head list;
+ int pid;
+ int in_use;
+ wait_queue_head_t process_wait_q;
+};
+
+struct drm_kgsl_gem_object_fence {
+ int32_t fence_id;
+ unsigned int num_buffers;
+ int ts_valid;
+ unsigned int timestamp;
+ int ts_device;
+ int lockpid;
+ struct list_head buffers_in_fence;
+};
+
+struct drm_kgsl_gem_object_fence_list_entry {
+ struct list_head list;
+ int in_use;
+ struct drm_gem_object *gem_obj;
+};
+
+static int32_t fence_id = 0x1;
+
+static struct drm_kgsl_gem_object_fence
+ gem_buf_fence[DRM_KGSL_NUM_FENCE_ENTRIES];
+
+struct drm_kgsl_gem_object {
+ struct drm_gem_object *obj;
+ uint32_t type;
+ struct kgsl_memdesc memdesc;
+ struct kgsl_pagetable *pagetable;
+ uint64_t mmap_offset;
+ int bufcount;
+ int flags;
+ struct list_head list;
+ int active;
+
+ struct {
+ uint32_t offset;
+ uint32_t gpuaddr;
+ } bufs[DRM_KGSL_GEM_MAX_BUFFERS];
+
+ int bound;
+ int lockpid;
+ /* Put these here to avoid allocing all the time */
+ struct drm_kgsl_gem_object_wait_list_entry
+ wait_entries[DRM_KGSL_HANDLE_WAIT_ENTRIES];
+ /* Each object can only appear in a single fence */
+ struct drm_kgsl_gem_object_fence_list_entry
+ fence_entries[DRM_KGSL_NUM_FENCE_ENTRIES];
+
+ struct list_head wait_list;
+};
+
+static int kgsl_drm_inited = DRM_KGSL_NOT_INITED;
+
+/* This is a global list of all the memory currently mapped in the MMU */
+static struct list_head kgsl_mem_list;
+
+static void kgsl_gem_mem_flush(struct kgsl_memdesc *memdesc, int type, int op)
+{
+ int cacheop = 0;
+
+ switch (op) {
+ case DRM_KGSL_GEM_CACHE_OP_TO_DEV:
+ if (type & (DRM_KGSL_GEM_CACHE_WBACK |
+ DRM_KGSL_GEM_CACHE_WBACKWA))
+ cacheop = KGSL_CACHE_OP_CLEAN;
+
+ break;
+
+ case DRM_KGSL_GEM_CACHE_OP_FROM_DEV:
+ if (type & (DRM_KGSL_GEM_CACHE_WBACK |
+ DRM_KGSL_GEM_CACHE_WBACKWA |
+ DRM_KGSL_GEM_CACHE_WTHROUGH))
+ cacheop = KGSL_CACHE_OP_INV;
+ }
+
+ kgsl_cache_range_op(memdesc, cacheop);
+}
+
+/* TODO:
+ * Add vsync wait */
+
+static int kgsl_drm_load(struct drm_device *dev, unsigned long flags)
+{
+ return 0;
+}
+
+static int kgsl_drm_unload(struct drm_device *dev)
+{
+ return 0;
+}
+
+struct kgsl_drm_device_priv {
+ struct kgsl_device *device[KGSL_DEVICE_MAX];
+ struct kgsl_device_private *devpriv[KGSL_DEVICE_MAX];
+};
+
+void kgsl_drm_preclose(struct drm_device *dev, struct drm_file *file_priv)
+{
+}
+
+static int kgsl_drm_suspend(struct drm_device *dev, pm_message_t state)
+{
+ return 0;
+}
+
+static int kgsl_drm_resume(struct drm_device *dev)
+{
+ return 0;
+}
+
+static void
+kgsl_gem_free_mmap_offset(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ struct drm_gem_mm *mm = dev->mm_private;
+ struct drm_kgsl_gem_object *priv = obj->driver_private;
+ struct drm_map_list *list;
+
+ list = &obj->map_list;
+ drm_ht_remove_item(&mm->offset_hash, &list->hash);
+ if (list->file_offset_node) {
+ drm_mm_put_block(list->file_offset_node);
+ list->file_offset_node = NULL;
+ }
+
+ kfree(list->map);
+ list->map = NULL;
+
+ priv->mmap_offset = 0;
+}
+
+static int
+kgsl_gem_memory_allocated(struct drm_gem_object *obj)
+{
+ struct drm_kgsl_gem_object *priv = obj->driver_private;
+ return priv->memdesc.size ? 1 : 0;
+}
+
+static int
+kgsl_gem_alloc_memory(struct drm_gem_object *obj)
+{
+ struct drm_kgsl_gem_object *priv = obj->driver_private;
+ int index;
+ int result = 0;
+
+ /* Return if the memory is already allocated */
+
+ if (kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
+ return 0;
+
+ if (priv->pagetable == NULL) {
+ priv->pagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
+
+ if (priv->pagetable == NULL) {
+ DRM_ERROR("Unable to get the GPU MMU pagetable\n");
+ return -EINVAL;
+ }
+ }
+
+ /* Set the flags for the memdesc (probably 0, unless it is cached) */
+ priv->memdesc.priv = 0;
+
+ if (TYPE_IS_PMEM(priv->type)) {
+ if (priv->type == DRM_KGSL_GEM_TYPE_EBI ||
+ priv->type & DRM_KGSL_GEM_PMEM_EBI) {
+ result = kgsl_sharedmem_ebimem_user(
+ &priv->memdesc,
+ priv->pagetable,
+ obj->size * priv->bufcount);
+ if (result) {
+ DRM_ERROR(
+ "Unable to allocate PMEM memory\n");
+ return result;
+ }
+ }
+ else
+ return -EINVAL;
+
+ } else if (TYPE_IS_MEM(priv->type)) {
+
+ if (priv->type == DRM_KGSL_GEM_TYPE_KMEM ||
+ priv->type & DRM_KGSL_GEM_CACHE_MASK)
+ list_add(&priv->list, &kgsl_mem_list);
+
+ result = kgsl_sharedmem_page_alloc_user(&priv->memdesc,
+ priv->pagetable,
+ obj->size * priv->bufcount);
+
+ if (result != 0) {
+ DRM_ERROR(
+ "Unable to allocate Vmalloc user memory\n");
+ return result;
+ }
+ } else
+ return -EINVAL;
+
+ for (index = 0; index < priv->bufcount; index++) {
+ priv->bufs[index].offset = index * obj->size;
+ priv->bufs[index].gpuaddr =
+ priv->memdesc.gpuaddr +
+ priv->bufs[index].offset;
+ }
+ priv->flags |= DRM_KGSL_GEM_FLAG_MAPPED;
+
+ return 0;
+}
+
+static void
+kgsl_gem_free_memory(struct drm_gem_object *obj)
+{
+ struct drm_kgsl_gem_object *priv = obj->driver_private;
+
+ if (!kgsl_gem_memory_allocated(obj) || TYPE_IS_FD(priv->type))
+ return;
+
+ kgsl_gem_mem_flush(&priv->memdesc, priv->type,
+ DRM_KGSL_GEM_CACHE_OP_FROM_DEV);
+
+ kgsl_sharedmem_free(&priv->memdesc);
+
+ kgsl_mmu_putpagetable(priv->pagetable);
+ priv->pagetable = NULL;
+
+ if ((priv->type == DRM_KGSL_GEM_TYPE_KMEM) ||
+ (priv->type & DRM_KGSL_GEM_CACHE_MASK))
+ list_del(&priv->list);
+
+ priv->flags &= ~DRM_KGSL_GEM_FLAG_MAPPED;
+
+}
+
+int
+kgsl_gem_init_object(struct drm_gem_object *obj)
+{
+ struct drm_kgsl_gem_object *priv;
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (priv == NULL) {
+ DRM_ERROR("Unable to create GEM object\n");
+ return -ENOMEM;
+ }
+
+ obj->driver_private = priv;
+ priv->obj = obj;
+
+ return 0;
+}
+
+void
+kgsl_gem_free_object(struct drm_gem_object *obj)
+{
+ kgsl_gem_free_memory(obj);
+ kgsl_gem_free_mmap_offset(obj);
+ drm_gem_object_release(obj);
+ kfree(obj->driver_private);
+}
+
+static int
+kgsl_gem_create_mmap_offset(struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ struct drm_gem_mm *mm = dev->mm_private;
+ struct drm_kgsl_gem_object *priv = obj->driver_private;
+ struct drm_map_list *list;
+ int msize;
+
+ list = &obj->map_list;
+ list->map = kzalloc(sizeof(struct drm_map_list), GFP_KERNEL);
+ if (list->map == NULL) {
+ DRM_ERROR("Unable to allocate drm_map_list\n");
+ return -ENOMEM;
+ }
+
+ msize = obj->size * priv->bufcount;
+
+ list->map->type = _DRM_GEM;
+ list->map->size = msize;
+ list->map->handle = obj;
+
+ /* Allocate a mmap offset */
+ list->file_offset_node = drm_mm_search_free(&mm->offset_manager,
+ msize / PAGE_SIZE,
+ 0, 0);
+
+ if (!list->file_offset_node) {
+ DRM_ERROR("Failed to allocate offset for %d\n", obj->name);
+ kfree(list->map);
+ return -ENOMEM;
+ }
+
+ list->file_offset_node = drm_mm_get_block(list->file_offset_node,
+ msize / PAGE_SIZE, 0);
+
+ if (!list->file_offset_node) {
+ DRM_ERROR("Unable to create the file_offset_node\n");
+ kfree(list->map);
+ return -ENOMEM;
+ }
+
+ list->hash.key = list->file_offset_node->start;
+ if (drm_ht_insert_item(&mm->offset_hash, &list->hash)) {
+ DRM_ERROR("Failed to add to map hash\n");
+ drm_mm_put_block(list->file_offset_node);
+ kfree(list->map);
+ return -ENOMEM;
+ }
+
+ priv->mmap_offset = ((uint64_t) list->hash.key) << PAGE_SHIFT;
+
+ return 0;
+}
+
+int
+kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
+ unsigned long *len)
+{
+ struct file *filp;
+ struct drm_device *dev;
+ struct drm_file *file_priv;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = 0;
+
+ filp = fget(drm_fd);
+ if (unlikely(filp == NULL)) {
+ DRM_ERROR("Unable to get the DRM file descriptor\n");
+ return -EINVAL;
+ }
+ file_priv = filp->private_data;
+ if (unlikely(file_priv == NULL)) {
+ DRM_ERROR("Unable to get the file private data\n");
+ fput(filp);
+ return -EINVAL;
+ }
+ dev = file_priv->minor->dev;
+ if (unlikely(dev == NULL)) {
+ DRM_ERROR("Unable to get the minor device\n");
+ fput(filp);
+ return -EINVAL;
+ }
+
+ obj = drm_gem_object_lookup(dev, file_priv, handle);
+ if (unlikely(obj == NULL)) {
+ DRM_ERROR("Invalid GEM handle %x\n", handle);
+ fput(filp);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ /* We can only use the MDP for PMEM regions */
+
+ if (TYPE_IS_PMEM(priv->type)) {
+ *start = priv->memdesc.physaddr +
+ priv->bufs[priv->active].offset;
+
+ *len = priv->memdesc.size;
+
+ kgsl_gem_mem_flush(&priv->memdesc,
+ priv->type, DRM_KGSL_GEM_CACHE_OP_TO_DEV);
+ } else {
+ *start = 0;
+ *len = 0;
+ ret = -EINVAL;
+ }
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ fput(filp);
+ return ret;
+}
+
+static int
+kgsl_gem_init_obj(struct drm_device *dev,
+ struct drm_file *file_priv,
+ struct drm_gem_object *obj,
+ int *handle)
+{
+ struct drm_kgsl_gem_object *priv;
+ int ret, i;
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ memset(&priv->memdesc, 0, sizeof(priv->memdesc));
+ priv->bufcount = 1;
+ priv->active = 0;
+ priv->bound = 0;
+
+ /* To preserve backwards compatability, the default memory source
+ is EBI */
+
+ priv->type = DRM_KGSL_GEM_TYPE_PMEM | DRM_KGSL_GEM_PMEM_EBI;
+
+ ret = drm_gem_handle_create(file_priv, obj, handle);
+
+ drm_gem_object_unreference(obj);
+ INIT_LIST_HEAD(&priv->wait_list);
+
+ for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) {
+ INIT_LIST_HEAD((struct list_head *) &priv->wait_entries[i]);
+ priv->wait_entries[i].pid = 0;
+ init_waitqueue_head(&priv->wait_entries[i].process_wait_q);
+ }
+
+ for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
+ INIT_LIST_HEAD((struct list_head *) &priv->fence_entries[i]);
+ priv->fence_entries[i].in_use = 0;
+ priv->fence_entries[i].gem_obj = obj;
+ }
+
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+}
+
+int
+kgsl_gem_create_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_create *create = data;
+ struct drm_gem_object *obj;
+ int ret, handle;
+
+ /* Page align the size so we can allocate multiple buffers */
+ create->size = ALIGN(create->size, 4096);
+
+ obj = drm_gem_object_alloc(dev, create->size);
+
+ if (obj == NULL) {
+ DRM_ERROR("Unable to allocate the GEM object\n");
+ return -ENOMEM;
+ }
+
+ ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
+ if (ret)
+ return ret;
+
+ create->handle = handle;
+ return 0;
+}
+
+int
+kgsl_gem_create_fd_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_create_fd *args = data;
+ struct file *file;
+ dev_t rdev;
+ struct fb_info *info;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret, put_needed, handle;
+
+ file = fget_light(args->fd, &put_needed);
+
+ if (file == NULL) {
+ DRM_ERROR("Unable to get the file object\n");
+ return -EBADF;
+ }
+
+ rdev = file->f_dentry->d_inode->i_rdev;
+
+ /* Only framebuffer objects are supported ATM */
+
+ if (MAJOR(rdev) != FB_MAJOR) {
+ DRM_ERROR("File descriptor is not a framebuffer\n");
+ ret = -EBADF;
+ goto error_fput;
+ }
+
+ info = registered_fb[MINOR(rdev)];
+
+ if (info == NULL) {
+ DRM_ERROR("Framebuffer minor %d is not registered\n",
+ MINOR(rdev));
+ ret = -EBADF;
+ goto error_fput;
+ }
+
+ obj = drm_gem_object_alloc(dev, info->fix.smem_len);
+
+ if (obj == NULL) {
+ DRM_ERROR("Unable to allocate GEM object\n");
+ ret = -ENOMEM;
+ goto error_fput;
+ }
+
+ ret = kgsl_gem_init_obj(dev, file_priv, obj, &handle);
+
+ if (ret)
+ goto error_fput;
+
+ mutex_lock(&dev->struct_mutex);
+
+ priv = obj->driver_private;
+ priv->memdesc.physaddr = info->fix.smem_start;
+ priv->type = DRM_KGSL_GEM_TYPE_FD_FBMEM;
+
+ mutex_unlock(&dev->struct_mutex);
+ args->handle = handle;
+
+error_fput:
+ fput_light(file, put_needed);
+
+ return ret;
+}
+
+int
+kgsl_gem_setmemtype_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_memtype *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = 0;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ if (TYPE_IS_FD(priv->type))
+ ret = -EINVAL;
+ else {
+ if (TYPE_IS_PMEM(args->type) || TYPE_IS_MEM(args->type))
+ priv->type = args->type;
+ else
+ ret = -EINVAL;
+ }
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int
+kgsl_gem_getmemtype_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_memtype *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ args->type = priv->type;
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return 0;
+}
+
+int
+kgsl_gem_unbind_gpu_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ return 0;
+}
+
+int
+kgsl_gem_bind_gpu_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ return 0;
+}
+
+/* Allocate the memory and prepare it for CPU mapping */
+
+int
+kgsl_gem_alloc_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_alloc *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ ret = kgsl_gem_alloc_memory(obj);
+
+ if (ret) {
+ DRM_ERROR("Unable to allocate object memory\n");
+ } else if (!priv->mmap_offset) {
+ ret = kgsl_gem_create_mmap_offset(obj);
+ if (ret)
+ DRM_ERROR("Unable to create a mmap offset\n");
+ }
+
+ args->offset = priv->mmap_offset;
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int
+kgsl_gem_mmap_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_mmap *args = data;
+ struct drm_gem_object *obj;
+ unsigned long addr;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ down_write(¤t->mm->mmap_sem);
+
+ addr = do_mmap(obj->filp, 0, args->size,
+ PROT_READ | PROT_WRITE, MAP_SHARED,
+ args->offset);
+
+ up_write(¤t->mm->mmap_sem);
+
+ mutex_lock(&dev->struct_mutex);
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ if (IS_ERR((void *) addr))
+ return addr;
+
+ args->hostptr = (uint32_t) addr;
+ return 0;
+}
+
+/* This function is deprecated */
+
+int
+kgsl_gem_prep_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_prep *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ ret = kgsl_gem_alloc_memory(obj);
+ if (ret) {
+ DRM_ERROR("Unable to allocate object memory\n");
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+ }
+
+ if (priv->mmap_offset == 0) {
+ ret = kgsl_gem_create_mmap_offset(obj);
+ if (ret) {
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+ return ret;
+ }
+ }
+
+ args->offset = priv->mmap_offset;
+ args->phys = priv->memdesc.physaddr;
+
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return 0;
+}
+
+int
+kgsl_gem_get_bufinfo_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_bufinfo *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = -EINVAL;
+ int index;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ if (!kgsl_gem_memory_allocated(obj)) {
+ DRM_ERROR("Memory not allocated for this object\n");
+ goto out;
+ }
+
+ for (index = 0; index < priv->bufcount; index++) {
+ args->offset[index] = priv->bufs[index].offset;
+ args->gpuaddr[index] = priv->bufs[index].gpuaddr;
+ }
+
+ args->count = priv->bufcount;
+ args->active = priv->active;
+
+ ret = 0;
+
+out:
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int
+kgsl_gem_set_bufcount_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_bufcount *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = -EINVAL;
+
+ if (args->bufcount < 1 || args->bufcount > DRM_KGSL_GEM_MAX_BUFFERS)
+ return -EINVAL;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ /* It is too much math to worry about what happens if we are already
+ allocated, so just bail if we are */
+
+ if (kgsl_gem_memory_allocated(obj)) {
+ DRM_ERROR("Memory already allocated - cannot change"
+ "number of buffers\n");
+ goto out;
+ }
+
+ priv->bufcount = args->bufcount;
+ ret = 0;
+
+out:
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int
+kgsl_gem_set_active_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_active *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ int ret = -EINVAL;
+
+ obj = drm_gem_object_lookup(dev, file_priv, args->handle);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", args->handle);
+ return -EBADF;
+ }
+
+ mutex_lock(&dev->struct_mutex);
+ priv = obj->driver_private;
+
+ if (args->active < 0 || args->active >= priv->bufcount) {
+ DRM_ERROR("Invalid active buffer %d\n", args->active);
+ goto out;
+ }
+
+ priv->active = args->active;
+ ret = 0;
+
+out:
+ drm_gem_object_unreference(obj);
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+int kgsl_gem_kmem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct drm_device *dev = obj->dev;
+ struct drm_kgsl_gem_object *priv;
+ unsigned long offset;
+ struct page *page;
+ int i;
+
+ mutex_lock(&dev->struct_mutex);
+
+ priv = obj->driver_private;
+
+ offset = (unsigned long) vmf->virtual_address - vma->vm_start;
+ i = offset >> PAGE_SHIFT;
+ page = sg_page(&(priv->memdesc.sg[i]));
+
+ if (!page) {
+ mutex_unlock(&dev->struct_mutex);
+ return VM_FAULT_SIGBUS;
+ }
+
+ get_page(page);
+ vmf->page = page;
+
+ mutex_unlock(&dev->struct_mutex);
+ return 0;
+}
+
+int kgsl_gem_phys_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct drm_gem_object *obj = vma->vm_private_data;
+ struct drm_device *dev = obj->dev;
+ struct drm_kgsl_gem_object *priv;
+ unsigned long offset, pfn;
+ int ret = 0;
+
+ offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >>
+ PAGE_SHIFT;
+
+ mutex_lock(&dev->struct_mutex);
+
+ priv = obj->driver_private;
+
+ pfn = (priv->memdesc.physaddr >> PAGE_SHIFT) + offset;
+ ret = vm_insert_pfn(vma,
+ (unsigned long) vmf->virtual_address, pfn);
+ mutex_unlock(&dev->struct_mutex);
+
+ switch (ret) {
+ case -ENOMEM:
+ case -EAGAIN:
+ return VM_FAULT_OOM;
+ case -EFAULT:
+ return VM_FAULT_SIGBUS;
+ default:
+ return VM_FAULT_NOPAGE;
+ }
+}
+
+static struct vm_operations_struct kgsl_gem_kmem_vm_ops = {
+ .fault = kgsl_gem_kmem_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+static struct vm_operations_struct kgsl_gem_phys_vm_ops = {
+ .fault = kgsl_gem_phys_fault,
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+};
+
+/* This is a clone of the standard drm_gem_mmap function modified to allow
+ us to properly map KMEM regions as well as the PMEM regions */
+
+int msm_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
+{
+ struct drm_file *priv = filp->private_data;
+ struct drm_device *dev = priv->minor->dev;
+ struct drm_gem_mm *mm = dev->mm_private;
+ struct drm_local_map *map = NULL;
+ struct drm_gem_object *obj;
+ struct drm_hash_item *hash;
+ struct drm_kgsl_gem_object *gpriv;
+ int ret = 0;
+
+ mutex_lock(&dev->struct_mutex);
+
+ if (drm_ht_find_item(&mm->offset_hash, vma->vm_pgoff, &hash)) {
+ mutex_unlock(&dev->struct_mutex);
+ return drm_mmap(filp, vma);
+ }
+
+ map = drm_hash_entry(hash, struct drm_map_list, hash)->map;
+ if (!map ||
+ ((map->flags & _DRM_RESTRICTED) && !capable(CAP_SYS_ADMIN))) {
+ ret = -EPERM;
+ goto out_unlock;
+ }
+
+ /* Check for valid size. */
+ if (map->size < vma->vm_end - vma->vm_start) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ obj = map->handle;
+
+ gpriv = obj->driver_private;
+
+ /* VM_PFNMAP is only for memory that doesn't use struct page
+ * in other words, not "normal" memory. If you try to use it
+ * with "normal" memory then the mappings don't get flushed. */
+
+ if (TYPE_IS_MEM(gpriv->type)) {
+ vma->vm_flags |= VM_RESERVED | VM_DONTEXPAND;
+ vma->vm_ops = &kgsl_gem_kmem_vm_ops;
+ } else {
+ vma->vm_flags |= VM_RESERVED | VM_IO | VM_PFNMAP |
+ VM_DONTEXPAND;
+ vma->vm_ops = &kgsl_gem_phys_vm_ops;
+ }
+
+ vma->vm_private_data = map->handle;
+
+
+ /* Take care of requested caching policy */
+ if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM ||
+ gpriv->type & DRM_KGSL_GEM_CACHE_MASK) {
+ if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACKWA)
+ vma->vm_page_prot =
+ pgprot_writebackwacache(vma->vm_page_prot);
+ else if (gpriv->type & DRM_KGSL_GEM_CACHE_WBACK)
+ vma->vm_page_prot =
+ pgprot_writebackcache(vma->vm_page_prot);
+ else if (gpriv->type & DRM_KGSL_GEM_CACHE_WTHROUGH)
+ vma->vm_page_prot =
+ pgprot_writethroughcache(vma->vm_page_prot);
+ else
+ vma->vm_page_prot =
+ pgprot_writecombine(vma->vm_page_prot);
+ } else {
+ if (gpriv->type == DRM_KGSL_GEM_TYPE_KMEM_NOCACHE)
+ vma->vm_page_prot =
+ pgprot_noncached(vma->vm_page_prot);
+ else
+ /* default pmem is WC */
+ vma->vm_page_prot =
+ pgprot_writecombine(vma->vm_page_prot);
+ }
+
+ /* flush out existing KMEM cached mappings if new ones are
+ * of uncached type */
+ if (IS_MEM_UNCACHED(gpriv->type))
+ kgsl_cache_range_op(&gpriv->memdesc,
+ KGSL_CACHE_OP_FLUSH);
+
+ /* Add the other memory types here */
+
+ /* Take a ref for this mapping of the object, so that the fault
+ * handler can dereference the mmap offset's pointer to the object.
+ * This reference is cleaned up by the corresponding vm_close
+ * (which should happen whether the vma was created by this call, or
+ * by a vm_open due to mremap or partial unmap or whatever).
+ */
+ drm_gem_object_reference(obj);
+
+ vma->vm_file = filp; /* Needed for drm_vm_open() */
+ drm_vm_open_locked(vma);
+
+out_unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+ return ret;
+}
+
+void
+cleanup_fence(struct drm_kgsl_gem_object_fence *fence, int check_waiting)
+{
+ int j;
+ struct drm_kgsl_gem_object_fence_list_entry *this_fence_entry = NULL;
+ struct drm_kgsl_gem_object *unlock_obj;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object_wait_list_entry *lock_next;
+
+ fence->ts_valid = 0;
+ fence->timestamp = -1;
+ fence->ts_device = -1;
+
+ /* Walk the list of buffers in this fence and clean up the */
+ /* references. Note that this can cause memory allocations */
+ /* to be freed */
+ for (j = fence->num_buffers; j > 0; j--) {
+ this_fence_entry =
+ (struct drm_kgsl_gem_object_fence_list_entry *)
+ fence->buffers_in_fence.prev;
+
+ this_fence_entry->in_use = 0;
+ obj = this_fence_entry->gem_obj;
+ unlock_obj = obj->driver_private;
+
+ /* Delete it from the list */
+
+ list_del(&this_fence_entry->list);
+
+ /* we are unlocking - see if there are other pids waiting */
+ if (check_waiting) {
+ if (!list_empty(&unlock_obj->wait_list)) {
+ lock_next =
+ (struct drm_kgsl_gem_object_wait_list_entry *)
+ unlock_obj->wait_list.prev;
+
+ list_del((struct list_head *)&lock_next->list);
+
+ unlock_obj->lockpid = 0;
+ wake_up_interruptible(
+ &lock_next->process_wait_q);
+ lock_next->pid = 0;
+
+ } else {
+ /* List is empty so set pid to 0 */
+ unlock_obj->lockpid = 0;
+ }
+ }
+
+ drm_gem_object_unreference(obj);
+ }
+ /* here all the buffers in the fence are released */
+ /* clear the fence entry */
+ fence->fence_id = ENTRY_EMPTY;
+}
+
+int
+find_empty_fence(void)
+{
+ int i;
+
+ for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
+ if (gem_buf_fence[i].fence_id == ENTRY_EMPTY) {
+ gem_buf_fence[i].fence_id = fence_id++;
+ gem_buf_fence[i].ts_valid = 0;
+ INIT_LIST_HEAD(&(gem_buf_fence[i].buffers_in_fence));
+ if (fence_id == 0xFFFFFFF0)
+ fence_id = 1;
+ return i;
+ } else {
+
+ /* Look for entries to be cleaned up */
+ if (gem_buf_fence[i].fence_id == ENTRY_NEEDS_CLEANUP)
+ cleanup_fence(&gem_buf_fence[i], 0);
+ }
+ }
+
+ return ENTRY_EMPTY;
+}
+
+int
+find_fence(int index)
+{
+ int i;
+
+ for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
+ if (gem_buf_fence[i].fence_id == index)
+ return i;
+ }
+
+ return ENTRY_EMPTY;
+}
+
+void
+wakeup_fence_entries(struct drm_kgsl_gem_object_fence *fence)
+{
+ struct drm_kgsl_gem_object_fence_list_entry *this_fence_entry = NULL;
+ struct drm_kgsl_gem_object_wait_list_entry *lock_next;
+ struct drm_kgsl_gem_object *unlock_obj;
+ struct drm_gem_object *obj;
+
+ /* TS has expired when we get here */
+ fence->ts_valid = 0;
+ fence->timestamp = -1;
+ fence->ts_device = -1;
+
+ list_for_each_entry(this_fence_entry, &fence->buffers_in_fence, list) {
+ obj = this_fence_entry->gem_obj;
+ unlock_obj = obj->driver_private;
+
+ if (!list_empty(&unlock_obj->wait_list)) {
+ lock_next =
+ (struct drm_kgsl_gem_object_wait_list_entry *)
+ unlock_obj->wait_list.prev;
+
+ /* Unblock the pid */
+ lock_next->pid = 0;
+
+ /* Delete it from the list */
+ list_del((struct list_head *)&lock_next->list);
+
+ unlock_obj->lockpid = 0;
+ wake_up_interruptible(&lock_next->process_wait_q);
+
+ } else {
+ /* List is empty so set pid to 0 */
+ unlock_obj->lockpid = 0;
+ }
+ }
+ fence->fence_id = ENTRY_NEEDS_CLEANUP; /* Mark it as needing cleanup */
+}
+
+int
+kgsl_gem_lock_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ /* The purpose of this function is to lock a given set of handles. */
+ /* The driver will maintain a list of locked handles. */
+ /* If a request comes in for a handle that's locked the thread will */
+ /* block until it's no longer in use. */
+
+ struct drm_kgsl_gem_lock_handles *args = data;
+ struct drm_gem_object *obj;
+ struct drm_kgsl_gem_object *priv;
+ struct drm_kgsl_gem_object_fence_list_entry *this_fence_entry = NULL;
+ struct drm_kgsl_gem_object_fence *fence;
+ struct drm_kgsl_gem_object_wait_list_entry *lock_item;
+ int i, j;
+ int result = 0;
+ uint32_t *lock_list;
+ uint32_t *work_list = NULL;
+ int32_t fence_index;
+
+ /* copy in the data from user space */
+ lock_list = kzalloc(sizeof(uint32_t) * args->num_handles, GFP_KERNEL);
+ if (!lock_list) {
+ DRM_ERROR("Unable allocate memory for lock list\n");
+ result = -ENOMEM;
+ goto error;
+ }
+
+ if (copy_from_user(lock_list, args->handle_list,
+ sizeof(uint32_t) * args->num_handles)) {
+ DRM_ERROR("Unable to copy the lock list from the user\n");
+ result = -EFAULT;
+ goto free_handle_list;
+ }
+
+
+ work_list = lock_list;
+ mutex_lock(&dev->struct_mutex);
+
+ /* build the fence for this group of handles */
+ fence_index = find_empty_fence();
+ if (fence_index == ENTRY_EMPTY) {
+ DRM_ERROR("Unable to find a empty fence\n");
+ args->lock_id = 0xDEADBEEF;
+ result = -EFAULT;
+ goto out_unlock;
+ }
+
+ fence = &gem_buf_fence[fence_index];
+ gem_buf_fence[fence_index].num_buffers = args->num_handles;
+ args->lock_id = gem_buf_fence[fence_index].fence_id;
+
+ for (j = args->num_handles; j > 0; j--, lock_list++) {
+ obj = drm_gem_object_lookup(dev, file_priv, *lock_list);
+
+ if (obj == NULL) {
+ DRM_ERROR("Invalid GEM handle %x\n", *lock_list);
+ result = -EBADF;
+ goto out_unlock;
+ }
+
+ priv = obj->driver_private;
+ this_fence_entry = NULL;
+
+ /* get a fence entry to hook into the fence */
+ for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
+ if (!priv->fence_entries[i].in_use) {
+ this_fence_entry = &priv->fence_entries[i];
+ this_fence_entry->in_use = 1;
+ break;
+ }
+ }
+
+ if (this_fence_entry == NULL) {
+ fence->num_buffers = 0;
+ fence->fence_id = ENTRY_EMPTY;
+ args->lock_id = 0xDEADBEAD;
+ result = -EFAULT;
+ drm_gem_object_unreference(obj);
+ goto out_unlock;
+ }
+
+ /* We're trying to lock - add to a fence */
+ list_add((struct list_head *)this_fence_entry,
+ &gem_buf_fence[fence_index].buffers_in_fence);
+ if (priv->lockpid) {
+
+ if (priv->lockpid == args->pid) {
+ /* now that things are running async this */
+ /* happens when an op isn't done */
+ /* so it's already locked by the calling pid */
+ continue;
+ }
+
+
+ /* if a pid already had it locked */
+ /* create and add to wait list */
+ for (i = 0; i < DRM_KGSL_HANDLE_WAIT_ENTRIES; i++) {
+ if (priv->wait_entries[i].in_use == 0) {
+ /* this one is empty */
+ lock_item = &priv->wait_entries[i];
+ lock_item->in_use = 1;
+ lock_item->pid = args->pid;
+ INIT_LIST_HEAD((struct list_head *)
+ &priv->wait_entries[i]);
+ break;
+ }
+ }
+
+ if (i == DRM_KGSL_HANDLE_WAIT_ENTRIES) {
+
+ result = -EFAULT;
+ drm_gem_object_unreference(obj);
+ goto out_unlock;
+ }
+
+ list_add_tail((struct list_head *)&lock_item->list,
+ &priv->wait_list);
+ mutex_unlock(&dev->struct_mutex);
+ /* here we need to block */
+ wait_event_interruptible_timeout(
+ priv->wait_entries[i].process_wait_q,
+ (priv->lockpid == 0),
+ msecs_to_jiffies(64));
+ mutex_lock(&dev->struct_mutex);
+ lock_item->in_use = 0;
+ }
+
+ /* Getting here means no one currently holds the lock */
+ priv->lockpid = args->pid;
+
+ args->lock_id = gem_buf_fence[fence_index].fence_id;
+ }
+ fence->lockpid = args->pid;
+
+out_unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+free_handle_list:
+ kfree(work_list);
+
+error:
+ return result;
+}
+
+int
+kgsl_gem_unlock_handle_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_unlock_handles *args = data;
+ int result = 0;
+ int32_t fence_index;
+
+ mutex_lock(&dev->struct_mutex);
+ fence_index = find_fence(args->lock_id);
+ if (fence_index == ENTRY_EMPTY) {
+ DRM_ERROR("Invalid lock ID: %x\n", args->lock_id);
+ result = -EFAULT;
+ goto out_unlock;
+ }
+
+ cleanup_fence(&gem_buf_fence[fence_index], 1);
+
+out_unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+ return result;
+}
+
+
+int
+kgsl_gem_unlock_on_ts_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file_priv)
+{
+ struct drm_kgsl_gem_unlock_on_ts *args = data;
+ int result = 0;
+ int ts_done = 0;
+ int32_t fence_index, ts_device;
+ struct drm_kgsl_gem_object_fence *fence;
+ struct kgsl_device *device;
+
+ if (args->type == DRM_KGSL_GEM_TS_3D)
+ ts_device = KGSL_DEVICE_3D0;
+ else if (args->type == DRM_KGSL_GEM_TS_2D)
+ ts_device = KGSL_DEVICE_2D0;
+ else {
+ result = -EINVAL;
+ goto error;
+ }
+
+ device = kgsl_get_device(ts_device);
+ ts_done = kgsl_check_timestamp(device, NULL, args->timestamp);
+
+ mutex_lock(&dev->struct_mutex);
+
+ fence_index = find_fence(args->lock_id);
+ if (fence_index == ENTRY_EMPTY) {
+ DRM_ERROR("Invalid lock ID: %x\n", args->lock_id);
+ result = -EFAULT;
+ goto out_unlock;
+ }
+
+ fence = &gem_buf_fence[fence_index];
+ fence->ts_device = ts_device;
+
+ if (!ts_done)
+ fence->ts_valid = 1;
+ else
+ cleanup_fence(fence, 1);
+
+
+out_unlock:
+ mutex_unlock(&dev->struct_mutex);
+
+error:
+ return result;
+}
+
+struct drm_ioctl_desc kgsl_drm_ioctls[] = {
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_CREATE, kgsl_gem_create_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_PREP, kgsl_gem_prep_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_SETMEMTYPE, kgsl_gem_setmemtype_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_GETMEMTYPE, kgsl_gem_getmemtype_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_BIND_GPU, kgsl_gem_bind_gpu_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_UNBIND_GPU, kgsl_gem_unbind_gpu_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_ALLOC, kgsl_gem_alloc_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_MMAP, kgsl_gem_mmap_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_GET_BUFINFO, kgsl_gem_get_bufinfo_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_BUFCOUNT,
+ kgsl_gem_set_bufcount_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_SET_ACTIVE, kgsl_gem_set_active_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_LOCK_HANDLE,
+ kgsl_gem_lock_handle_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_UNLOCK_HANDLE,
+ kgsl_gem_unlock_handle_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_UNLOCK_ON_TS,
+ kgsl_gem_unlock_on_ts_ioctl, 0),
+ DRM_IOCTL_DEF_DRV(KGSL_GEM_CREATE_FD, kgsl_gem_create_fd_ioctl,
+ DRM_MASTER),
+};
+
+static struct drm_driver driver = {
+ .driver_features = DRIVER_GEM,
+ .load = kgsl_drm_load,
+ .unload = kgsl_drm_unload,
+ .preclose = kgsl_drm_preclose,
+ .suspend = kgsl_drm_suspend,
+ .resume = kgsl_drm_resume,
+ .reclaim_buffers = drm_core_reclaim_buffers,
+ .gem_init_object = kgsl_gem_init_object,
+ .gem_free_object = kgsl_gem_free_object,
+ .ioctls = kgsl_drm_ioctls,
+
+ .fops = {
+ .owner = THIS_MODULE,
+ .open = drm_open,
+ .release = drm_release,
+ .unlocked_ioctl = drm_ioctl,
+ .mmap = msm_drm_gem_mmap,
+ .poll = drm_poll,
+ .fasync = drm_fasync,
+ },
+
+ .name = DRIVER_NAME,
+ .desc = DRIVER_DESC,
+ .date = DRIVER_DATE,
+ .major = DRIVER_MAJOR,
+ .minor = DRIVER_MINOR,
+ .patchlevel = DRIVER_PATCHLEVEL,
+};
+
+int kgsl_drm_init(struct platform_device *dev)
+{
+ int i;
+
+ /* Only initialize once */
+ if (kgsl_drm_inited == DRM_KGSL_INITED)
+ return 0;
+
+ kgsl_drm_inited = DRM_KGSL_INITED;
+
+ driver.num_ioctls = DRM_ARRAY_SIZE(kgsl_drm_ioctls);
+
+ INIT_LIST_HEAD(&kgsl_mem_list);
+
+ for (i = 0; i < DRM_KGSL_NUM_FENCE_ENTRIES; i++) {
+ gem_buf_fence[i].num_buffers = 0;
+ gem_buf_fence[i].ts_valid = 0;
+ gem_buf_fence[i].fence_id = ENTRY_EMPTY;
+ }
+
+ return drm_platform_init(&driver, dev);
+}
+
+void kgsl_drm_exit(void)
+{
+ kgsl_drm_inited = DRM_KGSL_NOT_INITED;
+ drm_platform_exit(&driver, driver.kdriver.platform_device);
+}
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
new file mode 100644
index 0000000..6798eed
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -0,0 +1,324 @@
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <kgsl_device.h>
+
+#include "kgsl_trace.h"
+
+static void _add_event_to_list(struct list_head *head, struct kgsl_event *event)
+{
+ struct list_head *n;
+
+ for (n = head->next; n != head; n = n->next) {
+ struct kgsl_event *e =
+ list_entry(n, struct kgsl_event, list);
+
+ if (timestamp_cmp(e->timestamp, event->timestamp) > 0) {
+ list_add(&event->list, n->prev);
+ break;
+ }
+ }
+
+ if (n == head)
+ list_add_tail(&event->list, head);
+}
+
+/**
+ * kgsl_add_event - Add a new timstamp event for the KGSL device
+ * @device - KGSL device for the new event
+ * @id - the context ID that the event should be added to
+ * @ts - the timestamp to trigger the event on
+ * @cb - callback function to call when the timestamp expires
+ * @priv - private data for the specific event type
+ * @owner - driver instance that owns this event
+ *
+ * @returns - 0 on success or error code on failure
+ */
+int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
+ void (*cb)(struct kgsl_device *, void *, u32, u32), void *priv,
+ void *owner)
+{
+ struct kgsl_event *event;
+ unsigned int cur_ts;
+ struct kgsl_context *context = NULL;
+
+ if (cb == NULL)
+ return -EINVAL;
+
+ if (id != KGSL_MEMSTORE_GLOBAL) {
+ context = idr_find(&device->context_idr, id);
+ if (context == NULL)
+ return -EINVAL;
+ }
+ cur_ts = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
+
+ /*
+ * Check to see if the requested timestamp has already fired. If it
+ * did do the callback right away. Make sure to send the timestamp that
+ * the event expected instead of the current timestamp because sometimes
+ * the event handlers can get confused.
+ */
+
+ if (timestamp_cmp(cur_ts, ts) >= 0) {
+ trace_kgsl_fire_event(id, ts, 0);
+ cb(device, priv, id, ts);
+ return 0;
+ }
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (event == NULL)
+ return -ENOMEM;
+
+ event->context = context;
+ event->timestamp = ts;
+ event->priv = priv;
+ event->func = cb;
+ event->owner = owner;
+ event->created = jiffies;
+
+ trace_kgsl_register_event(id, ts);
+
+ /* inc refcount to avoid race conditions in cleanup */
+ if (context)
+ kgsl_context_get(context);
+
+ /* Add the event to either the owning context or the global list */
+
+ if (context) {
+ _add_event_to_list(&context->events, event);
+
+ /*
+ * Add it to the master list of contexts with pending events if
+ * it isn't already there
+ */
+
+ if (list_empty(&context->events_list))
+ list_add_tail(&context->events_list,
+ &device->events_pending_list);
+
+ } else
+ _add_event_to_list(&device->events, event);
+
+ /*
+ * Increase the active count on the device to avoid going into power
+ * saving modes while events are pending
+ */
+
+ device->active_cnt++;
+
+ queue_work(device->work_queue, &device->ts_expired_ws);
+ return 0;
+}
+EXPORT_SYMBOL(kgsl_add_event);
+
+/**
+ * kgsl_cancel_events_ctxt - Cancel all events for a context
+ * @device - KGSL device for the events to cancel
+ * @context - context whose events we want to cancel
+ *
+ */
+void kgsl_cancel_events_ctxt(struct kgsl_device *device,
+ struct kgsl_context *context)
+{
+ struct kgsl_event *event, *event_tmp;
+ unsigned int id, cur;
+
+ cur = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
+ id = context->id;
+
+ list_for_each_entry_safe(event, event_tmp, &context->events, list) {
+ /*
+ * "cancel" the events by calling their callback.
+ * Currently, events are used for lock and memory
+ * management, so if the process is dying the right
+ * thing to do is release or free.
+ *
+ * Send the current timestamp so the event knows how far the
+ * system got before the event was canceled
+ */
+
+ trace_kgsl_fire_event(id, cur, jiffies - event->created);
+
+ if (event->func)
+ event->func(device, event->priv, id, cur);
+
+ kgsl_context_put(context);
+ list_del(&event->list);
+ kfree(event);
+
+ kgsl_active_count_put(device);
+ }
+
+ /* Remove ourselves from the master pending list */
+ list_del_init(&context->events_list);
+}
+
+/**
+ * kgsl_cancel_events - Cancel all generic events for a process
+ * @device - KGSL device for the events to cancel
+ * @owner - driver instance that owns the events to cancel
+ *
+ */
+void kgsl_cancel_events(struct kgsl_device *device,
+ void *owner)
+{
+ struct kgsl_event *event, *event_tmp;
+ unsigned int cur;
+
+ cur = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
+
+ list_for_each_entry_safe(event, event_tmp, &device->events, list) {
+ if (event->owner != owner)
+ continue;
+
+ /*
+ * "cancel" the events by calling their callback.
+ * Currently, events are used for lock and memory
+ * management, so if the process is dying the right
+ * thing to do is release or free. Send the current timestamp so
+ * the callback knows how far the GPU made it before things went
+ * explosion
+ */
+
+ trace_kgsl_fire_event(KGSL_MEMSTORE_GLOBAL, cur,
+ jiffies - event->created);
+
+ if (event->func)
+ event->func(device, event->priv, KGSL_MEMSTORE_GLOBAL,
+ cur);
+
+ if (event->context)
+ kgsl_context_put(event->context);
+
+ list_del(&event->list);
+ kfree(event);
+
+ kgsl_active_count_put(device);
+ }
+}
+EXPORT_SYMBOL(kgsl_cancel_events);
+
+static void _process_event_list(struct kgsl_device *device,
+ struct list_head *head, unsigned int timestamp)
+{
+ struct kgsl_event *event, *tmp;
+ unsigned int id;
+
+ list_for_each_entry_safe(event, tmp, head, list) {
+ if (timestamp_cmp(timestamp, event->timestamp) < 0)
+ break;
+
+ id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
+
+ /*
+ * Send the timestamp of the expired event, not the current
+ * timestamp. This prevents the event handlers from getting
+ * confused if they don't bother comparing the current timetamp
+ * to the timestamp they wanted
+ */
+
+ trace_kgsl_fire_event(id, event->timestamp,
+ jiffies - event->created);
+
+ if (event->func)
+ event->func(device, event->priv, id, event->timestamp);
+
+ if (event->context)
+ kgsl_context_put(event->context);
+
+ list_del(&event->list);
+ kfree(event);
+
+ kgsl_active_count_put(device);
+ }
+}
+
+static inline int _mark_next_event(struct kgsl_device *device,
+ struct list_head *head)
+{
+ struct kgsl_event *event;
+
+ if (!list_empty(head)) {
+ event = list_first_entry(head, struct kgsl_event, list);
+
+ /*
+ * Next event will return 0 if the event was marked or 1 if the
+ * timestamp on the event has passed - return that up a layer
+ */
+
+ return device->ftbl->next_event(device, event);
+ }
+
+ return 0;
+}
+
+static int kgsl_process_context_events(struct kgsl_device *device,
+ struct kgsl_context *context)
+{
+ while (1) {
+ unsigned int timestamp = kgsl_readtimestamp(device, context,
+ KGSL_TIMESTAMP_RETIRED);
+
+ _process_event_list(device, &context->events, timestamp);
+
+ /*
+ * _mark_next event will return 1 as long as the next event
+ * timestamp has expired - this is to cope with an unavoidable
+ * race condition with the GPU that is still processing events.
+ */
+
+ if (!_mark_next_event(device, &context->events))
+ break;
+ }
+
+ /*
+ * Return 0 if the list is empty so the calling function can remove the
+ * context from the pending list
+ */
+
+ return list_empty(&context->events) ? 0 : 1;
+}
+
+void kgsl_process_events(struct work_struct *work)
+{
+ struct kgsl_device *device = container_of(work, struct kgsl_device,
+ ts_expired_ws);
+ struct kgsl_context *context, *tmp;
+ uint32_t timestamp;
+
+ mutex_lock(&device->mutex);
+
+ /* Process expired global events */
+ timestamp = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
+ _process_event_list(device, &device->events, timestamp);
+ _mark_next_event(device, &device->events);
+
+ /* Now process all of the pending contexts */
+ list_for_each_entry_safe(context, tmp, &device->events_pending_list,
+ events_list) {
+
+ /*
+ * If kgsl_timestamp_expired_context returns 0 then it no longer
+ * has any pending events and can be removed from the list
+ */
+
+ if (kgsl_process_context_events(device, context) == 0)
+ list_del_init(&context->events_list);
+ }
+
+ mutex_unlock(&device->mutex);
+}
+EXPORT_SYMBOL(kgsl_process_events);
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
index 33f242b..8f28505 100644
--- a/drivers/gpu/msm/kgsl_gpummu.c
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -180,6 +180,13 @@
return NULL;
}
+/**
+ * kgsl_ptpool_add
+ * @pool: A pointer to a ptpool structure
+ * @entries: Number of entries to add
+ *
+ * Add static entries to the pagetable pool.
+ */
static int
kgsl_ptpool_add(struct kgsl_ptpool *pool, int count)
@@ -189,11 +196,15 @@
mutex_lock(&pool->lock);
+ /* Only 4MB can be allocated in one chunk, so larger allocations
+ need to be split into multiple sections */
while (count) {
int entries = ((count * pool->ptsize) > SZ_4M) ?
SZ_4M / pool->ptsize : count;
+ /* Add the entries as static, i.e. they don't ever stand
+ a chance of being removed */
ret = _kgsl_ptpool_add_entries(pool, entries, 0);
if (ret)
@@ -206,6 +217,14 @@
return ret;
}
+/**
+ * kgsl_ptpool_alloc
+ * @pool: A pointer to a ptpool structure
+ * @addr: A pointer to store the physical address of the chunk
+ *
+ * Allocate a pagetable from the pool. Returns the virtual address
+ * of the pagetable, the physical address is returned in physaddr
+ */
static void *kgsl_ptpool_alloc(struct kgsl_ptpool *pool,
unsigned int *physaddr)
@@ -218,7 +237,7 @@
if (addr)
goto done;
-
+ /* Add a chunk for 1 more pagetable and mark it as dynamic */
ret = _kgsl_ptpool_add_entries(pool, 1, 1);
if (ret)
@@ -241,6 +260,13 @@
kfree(chunk);
}
+/**
+ * kgsl_ptpool_free
+ * @pool: A pointer to a ptpool structure
+ * @addr: A pointer to the virtual address to free
+ *
+ * Free a pagetable allocated from the pool
+ */
static void kgsl_ptpool_free(struct kgsl_ptpool *pool, void *addr)
{
@@ -286,6 +312,13 @@
kfree(pool);
}
+/**
+ * kgsl_ptpool_init
+ * @pool: A pointer to a ptpool structure to initialize
+ * @entries: The number of inital entries to add to the pool
+ *
+ * Initalize a pool and allocate an initial chunk of entries.
+ */
void *kgsl_gpummu_ptpool_init(int entries)
{
int ptsize = KGSL_PAGETABLE_SIZE;
@@ -322,8 +355,9 @@
return NULL;
}
-int kgsl_gpummu_pt_equal(struct kgsl_pagetable *pt,
- unsigned int pt_base)
+int kgsl_gpummu_pt_equal(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt,
+ unsigned int pt_base)
{
struct kgsl_gpummu_pt *gpummu_pt = pt ? pt->priv : NULL;
return gpummu_pt && pt_base && (gpummu_pt->base.gpuaddr == pt_base);
@@ -376,10 +410,10 @@
KGSL_MEM_CRIT(mmu->device,
"mmu page fault: page=0x%lx pt=%d op=%s axi=%d\n",
reg & ~(PAGE_SIZE - 1),
- kgsl_mmu_get_ptname_from_ptbase(ptbase),
+ kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
reg & 0x02 ? "WRITE" : "READ", (reg >> 4) & 0xF);
trace_kgsl_mmu_pagefault(mmu->device, reg & ~(PAGE_SIZE - 1),
- kgsl_mmu_get_ptname_from_ptbase(ptbase),
+ kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
reg & 0x02 ? "WRITE" : "READ");
}
@@ -412,6 +446,8 @@
if (gpummu_pt->base.hostptr == NULL)
goto err_flushfilter;
+ /* ptpool allocations are from coherent memory, so update the
+ device statistics acordingly */
KGSL_STATS_ADD(KGSL_PAGETABLE_SIZE, kgsl_driver.stats.coherent,
kgsl_driver.stats.coherent_max);
@@ -444,7 +480,7 @@
}
if (flags & KGSL_MMUFLAGS_TLBFLUSH) {
-
+ /* Invalidate all and tc */
kgsl_regwrite(mmu->device, MH_MMU_INVALIDATE, 0x00000003);
}
}
@@ -454,11 +490,17 @@
unsigned int context_id)
{
if (mmu->flags & KGSL_FLAGS_STARTED) {
+ /* page table not current, then setup mmu to use new
+ * specified page table
+ */
if (mmu->hwpagetable != pagetable) {
mmu->hwpagetable = pagetable;
+ /* Since we do a TLB flush the tlb_flags should
+ * be cleared by calling kgsl_mmu_pt_get_flags
+ */
kgsl_mmu_pt_get_flags(pagetable, mmu->device->id);
-
+ /* call device specific set page table */
kgsl_setstate(mmu, context_id, KGSL_MMUFLAGS_TLBFLUSH |
KGSL_MMUFLAGS_PTUPDATE);
}
@@ -467,11 +509,16 @@
static int kgsl_gpummu_init(struct kgsl_mmu *mmu)
{
+ /*
+ * intialize device mmu
+ *
+ * call this with the global lock held
+ */
int status = 0;
-
+ /* sub-client MMU lookups require address translation */
if ((mmu->config & ~0x1) > 0) {
-
+ /*make sure virtual address range is a multiple of 64Kb */
if (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE & ((1 << 16) - 1)) {
KGSL_CORE_ERR("Invalid pagetable size requested "
"for GPUMMU: %x\n", CONFIG_MSM_KGSL_PAGE_TABLE_SIZE);
@@ -486,6 +533,11 @@
static int kgsl_gpummu_start(struct kgsl_mmu *mmu)
{
+ /*
+ * intialize device mmu
+ *
+ * call this with the global lock held
+ */
struct kgsl_device *device = mmu->device;
struct kgsl_gpummu_pt *gpummu_pt;
@@ -493,23 +545,28 @@
if (mmu->flags & KGSL_FLAGS_STARTED)
return 0;
-
+ /* MMU not enabled */
if ((mmu->config & 0x1) == 0)
return 0;
-
+ /* setup MMU and sub-client behavior */
kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config);
-
+ /* idle device */
kgsl_idle(device);
-
+ /* enable axi interrupts */
kgsl_regwrite(device, MH_INTERRUPT_MASK,
GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT);
kgsl_sharedmem_set(&mmu->setstate_memory, 0, 0,
mmu->setstate_memory.size);
+ /* TRAN_ERROR needs a 32 byte (32 byte aligned) chunk of memory
+ * to complete transactions in case of an MMU fault. Note that
+ * we'll leave the bottom 32 bytes of the setstate_memory for other
+ * purposes (e.g. use it when dummy read cycles are needed
+ * for other blocks) */
kgsl_regwrite(device, MH_MMU_TRAN_ERROR,
mmu->setstate_memory.physaddr + 32);
@@ -517,7 +574,7 @@
mmu->defaultpagetable =
kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
-
+ /* Return error if the default pagetable doesn't exist */
if (mmu->defaultpagetable == NULL)
return -ENOMEM;
@@ -544,6 +601,9 @@
unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
struct kgsl_gpummu_pt *gpummu_pt = mmu_specific_pt;
+ /* All GPU addresses as assigned are page aligned, but some
+ functions purturb the gpuaddr with an offset, so apply the
+ mask here to make sure we have the right address */
unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK;
@@ -558,7 +618,7 @@
GSL_TLBFLUSH_FILTER_SETDIRTY(superpte / GSL_PT_SUPER_PTE);
for (pte = ptefirst; pte < ptelast; pte++) {
#ifdef VERBOSE_DEBUG
-
+ /* check if PTE exists */
if (!kgsl_pt_map_get(gpummu_pt, pte))
KGSL_CORE_ERR("pt entry %x is already "
"unmapped for pagetable %p\n", pte, gpummu_pt);
@@ -570,7 +630,7 @@
GSL_PT_SUPER_PTE);
}
-
+ /* Post all writes to the pagetable */
wmb();
return 0;
@@ -594,7 +654,7 @@
pte = kgsl_pt_entry_get(KGSL_PAGETABLE_BASE, memdesc->gpuaddr);
-
+ /* Flush the TLB if the first PTE isn't at the superpte boundary */
if (pte & (GSL_PT_SUPER_PTE - 1))
flushtlb = 1;
@@ -602,7 +662,7 @@
unsigned int paddr = kgsl_get_sg_pa(s);
unsigned int j;
-
+ /* Each sg entry might be multiple pages long */
for (j = paddr; j < paddr + s->length; pte++, j += PAGE_SIZE) {
if (SUPERPTE_IS_DIRTY(pte))
flushtlb = 1;
@@ -610,14 +670,14 @@
}
}
-
+ /* Flush the TLB if the last PTE isn't at the superpte boundary */
if ((pte + 1) & (GSL_PT_SUPER_PTE - 1))
flushtlb = 1;
wmb();
if (flushtlb) {
-
+ /*set all devices as needing flushing*/
*tlb_flags = UINT_MAX;
GSL_TLBFLUSH_FILTER_RESET();
}
@@ -627,12 +687,16 @@
static void kgsl_gpummu_stop(struct kgsl_mmu *mmu)
{
- kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
mmu->flags &= ~KGSL_FLAGS_STARTED;
}
static int kgsl_gpummu_close(struct kgsl_mmu *mmu)
{
+ /*
+ * close device mmu
+ *
+ * call this with the global lock held
+ */
if (mmu->setstate_memory.gpuaddr)
kgsl_sharedmem_free(&mmu->setstate_memory);
@@ -651,12 +715,18 @@
}
static unsigned int
-kgsl_gpummu_pt_get_base_addr(struct kgsl_pagetable *pt)
+kgsl_gpummu_get_pt_base_addr(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt)
{
struct kgsl_gpummu_pt *gpummu_pt = pt->priv;
return gpummu_pt->base.gpuaddr;
}
+static int kgsl_gpummu_get_num_iommu_units(struct kgsl_mmu *mmu)
+{
+ return 1;
+}
+
struct kgsl_mmu_ops gpummu_ops = {
.mmu_init = kgsl_gpummu_init,
.mmu_close = kgsl_gpummu_close,
@@ -666,10 +736,13 @@
.mmu_device_setstate = kgsl_gpummu_default_setstate,
.mmu_pagefault = kgsl_gpummu_pagefault,
.mmu_get_current_ptbase = kgsl_gpummu_get_current_ptbase,
+ .mmu_pt_equal = kgsl_gpummu_pt_equal,
+ .mmu_get_pt_base_addr = kgsl_gpummu_get_pt_base_addr,
.mmu_enable_clk = NULL,
.mmu_disable_clk_on_ts = NULL,
.mmu_get_pt_lsb = NULL,
- .mmu_get_reg_map_desc = NULL,
+ .mmu_get_reg_gpuaddr = NULL,
+ .mmu_get_num_iommu_units = kgsl_gpummu_get_num_iommu_units,
};
struct kgsl_mmu_pt_ops gpummu_pt_ops = {
@@ -677,6 +750,4 @@
.mmu_unmap = kgsl_gpummu_unmap,
.mmu_create_pagetable = kgsl_gpummu_create_pagetable,
.mmu_destroy_pagetable = kgsl_gpummu_destroy_pagetable,
- .mmu_pt_equal = kgsl_gpummu_pt_equal,
- .mmu_pt_get_base_addr = kgsl_gpummu_pt_get_base_addr,
};
diff --git a/drivers/gpu/msm/kgsl_gpummu.h b/drivers/gpu/msm/kgsl_gpummu.h
index d49a430..99e7d5f 100644
--- a/drivers/gpu/msm/kgsl_gpummu.h
+++ b/drivers/gpu/msm/kgsl_gpummu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -21,6 +21,7 @@
(MH_INTERRUPT_MASK__AXI_READ_ERROR | \
MH_INTERRUPT_MASK__AXI_WRITE_ERROR)
+/* Macros to manage TLB flushing */
#define GSL_TLBFLUSH_FILTER_ENTRY_NUMBITS (sizeof(unsigned char) * 8)
#define GSL_TLBFLUSH_FILTER_GET(superpte) \
(*((unsigned char *) \
@@ -46,7 +47,7 @@
struct kgsl_gpummu_pt {
struct kgsl_memdesc base;
unsigned int last_superpte;
-
+ /* Maintain filter to manage tlb flushing */
struct kgsl_tlbflushfilter tlbflushfilter;
};
@@ -74,4 +75,4 @@
void *kgsl_gpummu_ptpool_init(int entries);
void kgsl_gpummu_ptpool_destroy(void *ptpool);
-#endif
+#endif /* __KGSL_GPUMMU_H */
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index dc517ae..f2393e4 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -33,9 +33,28 @@
#include "z180.h"
+static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
+ { 0, 0, 0 }, /* GLOBAL_BASE */
+ { 0x10, 0x0003FFFF, 14 }, /* TTBR0 */
+ { 0x14, 0x0003FFFF, 14 }, /* TTBR1 */
+ { 0x20, 0, 0 }, /* FSR */
+ { 0x800, 0, 0 }, /* TLBIALL */
+ { 0x820, 0, 0 }, /* RESUME */
+};
+
+static struct kgsl_iommu_register_list kgsl_iommuv2_reg[KGSL_IOMMU_REG_MAX] = {
+ { 0, 0, 0 }, /* GLOBAL_BASE */
+ { 0x20, 0x00FFFFFF, 14 }, /* TTBR0 */
+ { 0x28, 0x00FFFFFF, 14 }, /* TTBR1 */
+ { 0x58, 0, 0 }, /* FSR */
+ { 0x618, 0, 0 }, /* TLBIALL */
+ { 0x008, 0, 0 } /* RESUME */
+};
+
struct remote_iommu_petersons_spinlock kgsl_iommu_sync_lock_vars;
-static struct kgsl_iommu_unit *get_iommu_unit(struct device *dev)
+static int get_iommu_unit(struct device *dev, struct kgsl_mmu **mmu_out,
+ struct kgsl_iommu_unit **iommu_unit_out)
{
int i, j, k;
@@ -56,13 +75,16 @@
struct kgsl_iommu_unit *iommu_unit =
&iommu->iommu_units[j];
for (k = 0; k < iommu_unit->dev_count; k++) {
- if (iommu_unit->dev[k].dev == dev)
- return iommu_unit;
+ if (iommu_unit->dev[k].dev == dev) {
+ *mmu_out = mmu;
+ *iommu_unit_out = iommu_unit;
+ return 0;
+ }
}
}
}
- return NULL;
+ return -EINVAL;
}
static struct kgsl_iommu_device *get_iommu_device(struct kgsl_iommu_unit *unit,
@@ -81,47 +103,71 @@
static int kgsl_iommu_fault_handler(struct iommu_domain *domain,
struct device *dev, unsigned long addr, int flags)
{
- struct kgsl_iommu_unit *iommu_unit = get_iommu_unit(dev);
- struct kgsl_iommu_device *iommu_dev = get_iommu_device(iommu_unit, dev);
+ int ret = 0;
+ struct kgsl_mmu *mmu;
+ struct kgsl_iommu *iommu;
+ struct kgsl_iommu_unit *iommu_unit;
+ struct kgsl_iommu_device *iommu_dev;
unsigned int ptbase, fsr;
- static unsigned long last_pagefault_jiffies;
- static int last_pid;
- int current_pid;
- unsigned long wait_time_jiff = 0;
+ struct kgsl_device *device;
+ struct adreno_device *adreno_dev;
+ unsigned int no_page_fault_log = 0;
+ ret = get_iommu_unit(dev, &mmu, &iommu_unit);
+ if (ret)
+ goto done;
+ iommu_dev = get_iommu_device(iommu_unit, dev);
if (!iommu_dev) {
KGSL_CORE_ERR("Invalid IOMMU device %p\n", dev);
- return -ENOSYS;
+ ret = -ENOSYS;
+ goto done;
}
+ iommu = mmu->priv;
+ device = mmu->device;
+ adreno_dev = ADRENO_DEVICE(device);
- wait_time_jiff = last_pagefault_jiffies + msecs_to_jiffies(500);
- last_pagefault_jiffies = jiffies;
+ ptbase = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
+ iommu_dev->ctx_id, TTBR0);
- ptbase = KGSL_IOMMU_GET_IOMMU_REG(iommu_unit->reg_map.hostptr,
- iommu_dev->ctx_id, TTBR0);
- current_pid = kgsl_mmu_get_ptname_from_ptbase(ptbase);
+ fsr = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
+ iommu_dev->ctx_id, FSR);
- if ((last_pid != current_pid) ||
- (time_after(jiffies, wait_time_jiff))
- ) {
- fsr = KGSL_IOMMU_GET_IOMMU_REG(iommu_unit->reg_map.hostptr,
- iommu_dev->ctx_id, FSR);
+ if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE)
+ no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr);
+ if (!no_page_fault_log) {
KGSL_MEM_CRIT(iommu_dev->kgsldev,
"GPU PAGE FAULT: addr = %lX pid = %d\n",
- addr, kgsl_mmu_get_ptname_from_ptbase(ptbase));
+ addr, kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase));
KGSL_MEM_CRIT(iommu_dev->kgsldev, "context = %d FSR = %X\n",
iommu_dev->ctx_id, fsr);
-
- last_pid = current_pid;
}
- trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
- kgsl_mmu_get_ptname_from_ptbase(ptbase), 0);
+ mmu->fault = 1;
+ iommu_dev->fault = 1;
- return 0;
+ trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
+ kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase), 0);
+
+ /*
+ * We do not want the h/w to resume fetching data from an iommu unit
+ * that has faulted, this is better for debugging as it will stall
+ * the GPU and trigger a snapshot. To stall the transaction return
+ * EBUSY error.
+ */
+ if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+ ret = -EBUSY;
+done:
+ return ret;
}
+/*
+ * kgsl_iommu_disable_clk - Disable iommu clocks
+ * @mmu - Pointer to mmu structure
+ *
+ * Disables iommu clocks
+ * Return - void
+ */
static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu)
{
struct kgsl_iommu *iommu = mmu->priv;
@@ -135,6 +181,8 @@
continue;
iommu_drvdata = dev_get_drvdata(
iommu_unit->dev[j].dev->parent);
+ if (iommu_drvdata->aclk)
+ clk_disable_unprepare(iommu_drvdata->aclk);
if (iommu_drvdata->clk)
clk_disable_unprepare(iommu_drvdata->clk);
clk_disable_unprepare(iommu_drvdata->pclk);
@@ -143,6 +191,18 @@
}
}
+/*
+ * kgsl_iommu_disable_clk_event - An event function that is executed when
+ * the required timestamp is reached. It disables the IOMMU clocks if
+ * the timestamp on which the clocks can be disabled has expired.
+ * @device - The kgsl device pointer
+ * @data - The data passed during event creation, it is the MMU pointer
+ * @id - Context ID, should always be KGSL_MEMSTORE_GLOBAL
+ * @ts - The current timestamp that has expired for the device
+ *
+ * Disables IOMMU clocks if timestamp has expired
+ * Return - void
+ */
static void kgsl_iommu_clk_disable_event(struct kgsl_device *device, void *data,
unsigned int id, unsigned int ts)
{
@@ -162,6 +222,9 @@
kgsl_iommu_disable_clk(mmu);
iommu->clk_event_queued = false;
} else {
+ /* add new event to fire when ts is reached, this can happen
+ * if we queued an event and someone requested the clocks to
+ * be disbaled on a later timestamp */
if (kgsl_add_event(device, id, iommu->iommu_last_cmd_ts,
kgsl_iommu_clk_disable_event, mmu, mmu)) {
KGSL_DRV_ERR(device,
@@ -171,6 +234,20 @@
}
}
+/*
+ * kgsl_iommu_disable_clk_on_ts - Sets up event to disable IOMMU clocks
+ * @mmu - The kgsl MMU pointer
+ * @ts - Timestamp on which the clocks should be disabled
+ * @ts_valid - Indicates whether ts parameter is valid, if this parameter
+ * is false then it means that the calling function wants to disable the
+ * IOMMU clocks immediately without waiting for any timestamp
+ *
+ * Creates an event to disable the IOMMU clocks on timestamp and if event
+ * already exists then updates the timestamp of disabling the IOMMU clocks
+ * with the passed in ts if it is greater than the current value at which
+ * the clocks will be disabled
+ * Return - void
+ */
static void
kgsl_iommu_disable_clk_on_ts(struct kgsl_mmu *mmu, unsigned int ts,
bool ts_valid)
@@ -197,6 +274,14 @@
}
}
+/*
+ * kgsl_iommu_enable_clk - Enable iommu clocks
+ * @mmu - Pointer to mmu structure
+ * @ctx_id - The context bank whose clocks are to be turned on
+ *
+ * Enables iommu clocks of a given context
+ * Return: 0 on success else error code
+ */
static int kgsl_iommu_enable_clk(struct kgsl_mmu *mmu,
int ctx_id)
{
@@ -224,6 +309,17 @@
goto done;
}
}
+ if (iommu_drvdata->aclk) {
+ ret = clk_prepare_enable(iommu_drvdata->aclk);
+ if (ret) {
+ if (iommu_drvdata->clk)
+ clk_disable_unprepare(
+ iommu_drvdata->clk);
+ clk_disable_unprepare(
+ iommu_drvdata->pclk);
+ goto done;
+ }
+ }
iommu_unit->dev[j].clk_enabled = true;
}
}
@@ -233,21 +329,44 @@
return ret;
}
-static int kgsl_iommu_pt_equal(struct kgsl_pagetable *pt,
- unsigned int pt_base)
+/*
+ * kgsl_iommu_pt_equal - Check if pagetables are equal
+ * @mmu - Pointer to mmu structure
+ * @pt - Pointer to pagetable
+ * @pt_base - Address of a pagetable that the IOMMU register is
+ * programmed with
+ *
+ * Checks whether the pt_base is equal to the base address of
+ * the pagetable which is contained in the pt structure
+ * Return - Non-zero if the pagetable addresses are equal else 0
+ */
+static int kgsl_iommu_pt_equal(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt,
+ unsigned int pt_base)
{
+ struct kgsl_iommu *iommu = mmu->priv;
struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
unsigned int domain_ptbase = iommu_pt ?
iommu_get_pt_base_addr(iommu_pt->domain) : 0;
-
- domain_ptbase &= (KGSL_IOMMU_TTBR0_PA_MASK <<
- KGSL_IOMMU_TTBR0_PA_SHIFT);
- pt_base &= (KGSL_IOMMU_TTBR0_PA_MASK <<
- KGSL_IOMMU_TTBR0_PA_SHIFT);
+ /* Only compare the valid address bits of the pt_base */
+ domain_ptbase &=
+ (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
+
+ pt_base &=
+ (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
+
return domain_ptbase && pt_base &&
(domain_ptbase == pt_base);
}
+/*
+ * kgsl_iommu_destroy_pagetable - Free up reaources help by a pagetable
+ * @mmu_specific_pt - Pointer to pagetable which is to be freed
+ *
+ * Return - void
+ */
static void kgsl_iommu_destroy_pagetable(void *mmu_specific_pt)
{
struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
@@ -256,6 +375,13 @@
kfree(iommu_pt);
}
+/*
+ * kgsl_iommu_create_pagetable - Create a IOMMU pagetable
+ *
+ * Allocate memory to hold a pagetable and allocate the IOMMU
+ * domain which is the actual IOMMU pagetable
+ * Return - void
+ */
void *kgsl_iommu_create_pagetable(void)
{
struct kgsl_iommu_pt *iommu_pt;
@@ -266,8 +392,13 @@
sizeof(struct kgsl_iommu_pt));
return NULL;
}
- iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
- MSM_IOMMU_DOMAIN_PT_CACHEABLE);
+ /* L2 redirect is not stable on IOMMU v2 */
+ if (msm_soc_version_supports_iommu_v1())
+ iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
+ MSM_IOMMU_DOMAIN_PT_CACHEABLE);
+ else
+ iommu_pt->domain = iommu_domain_alloc(&platform_bus_type,
+ 0);
if (!iommu_pt->domain) {
KGSL_CORE_ERR("Failed to create iommu domain\n");
kfree(iommu_pt);
@@ -280,6 +411,18 @@
return iommu_pt;
}
+/*
+ * kgsl_detach_pagetable_iommu_domain - Detach the IOMMU unit from a
+ * pagetable
+ * @mmu - Pointer to the device mmu structure
+ * @priv - Flag indicating whether the private or user context is to be
+ * detached
+ *
+ * Detach the IOMMU unit with the domain that is contained in the
+ * hwpagetable of the given mmu. After detaching the IOMMU unit is not
+ * in use because the PTBR will not be set after a detach
+ * Return - void
+ */
static void kgsl_detach_pagetable_iommu_domain(struct kgsl_mmu *mmu)
{
struct kgsl_iommu_pt *iommu_pt;
@@ -290,6 +433,10 @@
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
iommu_pt = mmu->defaultpagetable->priv;
for (j = 0; j < iommu_unit->dev_count; j++) {
+ /*
+ * If there is a 2nd default pagetable then priv domain
+ * is attached with this pagetable
+ */
if (mmu->priv_bank_table &&
(KGSL_IOMMU_CONTEXT_PRIV == j))
iommu_pt = mmu->priv_bank_table->priv;
@@ -305,16 +452,37 @@
}
}
+/*
+ * kgsl_attach_pagetable_iommu_domain - Attach the IOMMU unit to a
+ * pagetable, i.e set the IOMMU's PTBR to the pagetable address and
+ * setup other IOMMU registers for the device so that it becomes
+ * active
+ * @mmu - Pointer to the device mmu structure
+ * @priv - Flag indicating whether the private or user context is to be
+ * attached
+ *
+ * Attach the IOMMU unit with the domain that is contained in the
+ * hwpagetable of the given mmu.
+ * Return - 0 on success else error code
+ */
static int kgsl_attach_pagetable_iommu_domain(struct kgsl_mmu *mmu)
{
struct kgsl_iommu_pt *iommu_pt;
struct kgsl_iommu *iommu = mmu->priv;
int i, j, ret = 0;
+ /*
+ * Loop through all the iommu devcies under all iommu units and
+ * attach the domain
+ */
for (i = 0; i < iommu->unit_count; i++) {
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
iommu_pt = mmu->defaultpagetable->priv;
for (j = 0; j < iommu_unit->dev_count; j++) {
+ /*
+ * If there is a 2nd default pagetable then priv domain
+ * is attached to this pagetable
+ */
if (mmu->priv_bank_table &&
(KGSL_IOMMU_CONTEXT_PRIV == j))
iommu_pt = mmu->priv_bank_table->priv;
@@ -339,6 +507,17 @@
return ret;
}
+/*
+ * _get_iommu_ctxs - Get device pointer to IOMMU contexts
+ * @mmu - Pointer to mmu device
+ * data - Pointer to the platform data containing information about
+ * iommu devices for one iommu unit
+ * unit_id - The IOMMU unit number. This is not a specific ID but just
+ * a serial number. The serial numbers are treated as ID's of the
+ * IOMMU units
+ *
+ * Return - 0 on success else error code
+ */
static int _get_iommu_ctxs(struct kgsl_mmu *mmu,
struct kgsl_device_iommu_data *data, unsigned int unit_id)
{
@@ -384,6 +563,12 @@
return 0;
}
+/*
+ * kgsl_get_sync_lock - Init Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ *
+ * Return - 0 on success else error code
+ */
static int kgsl_iommu_init_sync_lock(struct kgsl_mmu *mmu)
{
struct kgsl_iommu *iommu = mmu->device->mmu.priv;
@@ -401,7 +586,7 @@
return -ENXIO;
}
-
+ /* Get the physical address of the Lock variables */
lock_phy_addr = (msm_iommu_lock_initialize()
- MSM_SHARED_RAM_BASE + msm_shared_ram_phys);
@@ -411,7 +596,7 @@
return -ENXIO;
}
-
+ /* Align the physical address to PAGE boundary and store the offset */
page_offset = (lock_phy_addr & (PAGE_SIZE - 1));
lock_phy_addr = (lock_phy_addr & ~(PAGE_SIZE - 1));
iommu->sync_lock_desc.physaddr = (unsigned int)lock_phy_addr;
@@ -425,8 +610,8 @@
if (status)
return status;
-
- iommu->sync_lock_desc.priv |= KGSL_MEMFLAGS_GLOBAL;
+ /* Map Lock variables to GPU pagetable */
+ iommu->sync_lock_desc.priv |= KGSL_MEMDESC_GLOBAL;
pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
mmu->defaultpagetable;
@@ -436,11 +621,11 @@
if (status) {
kgsl_mmu_unmap(pagetable, &iommu->sync_lock_desc);
- iommu->sync_lock_desc.priv &= ~~KGSL_MEMFLAGS_GLOBAL;
+ iommu->sync_lock_desc.priv &= ~KGSL_MEMDESC_GLOBAL;
return status;
}
-
+ /* Store Lock variables GPU address */
lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr + page_offset);
kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
@@ -454,12 +639,19 @@
iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
-
+ /* Flag Sync Lock is Initialized */
iommu->sync_lock_initialized = 1;
return status;
}
+/*
+ * kgsl_iommu_sync_lock - Acquire Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ * @cmds - Pointer to array of commands
+ *
+ * Return - int - number of commands.
+ */
inline unsigned int kgsl_iommu_sync_lock(struct kgsl_mmu *mmu,
unsigned int *cmds)
{
@@ -480,7 +672,7 @@
cmds += adreno_add_idle_cmds(adreno_dev, cmds);
*cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
-
+ /* MEM SPACE = memory, FUNCTION = equals */
*cmds++ = 0x13;
*cmds++ = lock_vars->flag[PROC_GPU];
*cmds++ = 0x1;
@@ -494,7 +686,7 @@
cmds += adreno_add_idle_cmds(adreno_dev, cmds);
*cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
-
+ /* MEM SPACE = memory, FUNCTION = equals */
*cmds++ = 0x13;
*cmds++ = lock_vars->flag[PROC_GPU];
*cmds++ = 0x1;
@@ -511,6 +703,13 @@
return cmds - start;
}
+/*
+ * kgsl_iommu_sync_lock - Release Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ * @cmds - Pointer to array of commands
+ *
+ * Return - int - number of commands.
+ */
inline unsigned int kgsl_iommu_sync_unlock(struct kgsl_mmu *mmu,
unsigned int *cmds)
{
@@ -529,7 +728,7 @@
*cmds++ = 0;
*cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
-
+ /* MEM SPACE = memory, FUNCTION = equals */
*cmds++ = 0x13;
*cmds++ = lock_vars->flag[PROC_GPU];
*cmds++ = 0x0;
@@ -541,6 +740,14 @@
return cmds - start;
}
+/*
+ * kgsl_get_iommu_ctxt - Get device pointer to IOMMU contexts
+ * @mmu - Pointer to mmu device
+ *
+ * Get the device pointers for the IOMMU user and priv contexts of the
+ * kgsl device
+ * Return - 0 on success else error code
+ */
static int kgsl_get_iommu_ctxt(struct kgsl_mmu *mmu)
{
struct platform_device *pdev =
@@ -550,7 +757,7 @@
struct kgsl_iommu *iommu = mmu->device->mmu.priv;
int i, ret = 0;
-
+ /* Go through the IOMMU data and get all the context devices */
if (KGSL_IOMMU_MAX_UNITS < pdata_dev->iommu_count) {
KGSL_CORE_ERR("Too many IOMMU units defined\n");
ret = -EINVAL;
@@ -567,6 +774,13 @@
return ret;
}
+/*
+ * kgsl_set_register_map - Map the IOMMU regsiters in the memory descriptors
+ * of the respective iommu units
+ * @mmu - Pointer to mmu structure
+ *
+ * Return - 0 on success else error code
+ */
static int kgsl_set_register_map(struct kgsl_mmu *mmu)
{
struct platform_device *pdev =
@@ -580,7 +794,7 @@
for (; i < pdata_dev->iommu_count; i++) {
struct kgsl_device_iommu_data data = pdata_dev->iommu_data[i];
iommu_unit = &iommu->iommu_units[i];
-
+ /* set up the IOMMU register map for the given IOMMU unit */
if (!data.physstart || !data.physend) {
KGSL_CORE_ERR("The register range for IOMMU unit not"
" specified\n");
@@ -599,13 +813,15 @@
}
iommu_unit->reg_map.size = data.physend - data.physstart + 1;
iommu_unit->reg_map.physaddr = data.physstart;
- memdesc_sg_phys(&iommu_unit->reg_map, data.physstart,
+ ret = memdesc_sg_phys(&iommu_unit->reg_map, data.physstart,
iommu_unit->reg_map.size);
+ if (ret)
+ goto err;
}
iommu->unit_count = pdata_dev->iommu_count;
return ret;
err:
-
+ /* Unmap any mapped IOMMU regions */
for (; i >= 0; i--) {
iommu_unit = &iommu->iommu_units[i];
iounmap(iommu_unit->reg_map.hostptr);
@@ -615,12 +831,36 @@
return ret;
}
-static unsigned int kgsl_iommu_pt_get_base_addr(struct kgsl_pagetable *pt)
+/*
+ * kgsl_iommu_get_pt_base_addr - Get the address of the pagetable that the
+ * IOMMU ttbr0 register is programmed with
+ * @mmu - Pointer to mmu
+ * @pt - kgsl pagetable pointer that contains the IOMMU domain pointer
+ *
+ * Return - actual pagetable address that the ttbr0 register is programmed
+ * with
+ */
+static unsigned int kgsl_iommu_get_pt_base_addr(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt)
{
+ struct kgsl_iommu *iommu = mmu->priv;
struct kgsl_iommu_pt *iommu_pt = pt->priv;
- return iommu_get_pt_base_addr(iommu_pt->domain);
+ return iommu_get_pt_base_addr(iommu_pt->domain) &
+ (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
}
+/*
+ * kgsl_iommu_get_pt_lsb - Return the lsb of the ttbr0 IOMMU register
+ * @mmu - Pointer to mmu structure
+ * @hostptr - Pointer to the IOMMU register map. This is used to match
+ * the iommu device whose lsb value is to be returned
+ * @ctx_id - The context bank whose lsb valus is to be returned
+ * Return - returns the lsb which is the last 14 bits of the ttbr0 IOMMU
+ * register. ttbr0 is the actual PTBR for of the IOMMU. The last 14 bits
+ * are only programmed once in the beginning when a domain is attached
+ * does not change.
+ */
static int kgsl_iommu_get_pt_lsb(struct kgsl_mmu *mmu,
unsigned int unit_id,
enum kgsl_iommu_context_id ctx_id)
@@ -642,6 +882,9 @@
unsigned int context_id)
{
if (mmu->flags & KGSL_FLAGS_STARTED) {
+ /* page table not current, then setup mmu to use new
+ * specified page table
+ */
if (mmu->hwpagetable != pagetable) {
unsigned int flags = 0;
mmu->hwpagetable = pagetable;
@@ -656,6 +899,11 @@
static int kgsl_iommu_init(struct kgsl_mmu *mmu)
{
+ /*
+ * intialize device mmu
+ *
+ * call this with the global lock held
+ */
int status = 0;
struct kgsl_iommu *iommu;
@@ -674,6 +922,19 @@
if (status)
goto done;
+ iommu->iommu_reg_list = kgsl_iommuv1_reg;
+ iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
+
+ if (msm_soc_version_supports_iommu_v1()) {
+ iommu->iommu_reg_list = kgsl_iommuv1_reg;
+ iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
+ } else {
+ iommu->iommu_reg_list = kgsl_iommuv2_reg;
+ iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V2;
+ }
+
+ /* A nop is required in an indirect buffer when switching
+ * pagetables in-stream */
kgsl_sharedmem_writel(&mmu->setstate_memory,
KGSL_IOMMU_SETSTATE_NOP_OFFSET,
cp_nop_packet(1));
@@ -688,41 +949,53 @@
return status;
}
+/*
+ * kgsl_iommu_setup_defaultpagetable - Setup the initial defualtpagetable
+ * for iommu. This function is only called once during first start, successive
+ * start do not call this funciton.
+ * @mmu - Pointer to mmu structure
+ *
+ * Create the initial defaultpagetable and setup the iommu mappings to it
+ * Return - 0 on success else error code
+ */
static int kgsl_iommu_setup_defaultpagetable(struct kgsl_mmu *mmu)
{
int status = 0;
int i = 0;
struct kgsl_iommu *iommu = mmu->priv;
- struct kgsl_iommu_pt *iommu_pt;
struct kgsl_pagetable *pagetable = NULL;
- if (!cpu_is_msm8960()) {
+ /* If chip is not 8960 then we use the 2nd context bank for pagetable
+ * switching on the 3D side for which a separate table is allocated */
+ if (!cpu_is_msm8960() && msm_soc_version_supports_iommu_v1()) {
mmu->priv_bank_table =
kgsl_mmu_getpagetable(KGSL_MMU_PRIV_BANK_TABLE_NAME);
if (mmu->priv_bank_table == NULL) {
status = -ENOMEM;
goto err;
}
- iommu_pt = mmu->priv_bank_table->priv;
}
mmu->defaultpagetable = kgsl_mmu_getpagetable(KGSL_MMU_GLOBAL_PT);
-
+ /* Return error if the default pagetable doesn't exist */
if (mmu->defaultpagetable == NULL) {
status = -ENOMEM;
goto err;
}
pagetable = mmu->priv_bank_table ? mmu->priv_bank_table :
mmu->defaultpagetable;
-
- for (i = 0; i < iommu->unit_count; i++) {
- iommu->iommu_units[i].reg_map.priv |= KGSL_MEMFLAGS_GLOBAL;
- status = kgsl_mmu_map(pagetable,
- &(iommu->iommu_units[i].reg_map),
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
- if (status) {
- iommu->iommu_units[i].reg_map.priv &=
- ~KGSL_MEMFLAGS_GLOBAL;
- goto err;
+ /* Map the IOMMU regsiters to only defaultpagetable */
+ if (msm_soc_version_supports_iommu_v1()) {
+ for (i = 0; i < iommu->unit_count; i++) {
+ iommu->iommu_units[i].reg_map.priv |=
+ KGSL_MEMDESC_GLOBAL;
+ status = kgsl_mmu_map(pagetable,
+ &(iommu->iommu_units[i].reg_map),
+ GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ if (status) {
+ iommu->iommu_units[i].reg_map.priv &=
+ ~KGSL_MEMDESC_GLOBAL;
+ goto err;
+ }
}
}
return status;
@@ -730,7 +1003,7 @@
for (i--; i >= 0; i--) {
kgsl_mmu_unmap(pagetable,
&(iommu->iommu_units[i].reg_map));
- iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMFLAGS_GLOBAL;
+ iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMDESC_GLOBAL;
}
if (mmu->priv_bank_table) {
kgsl_mmu_putpagetable(mmu->priv_bank_table);
@@ -758,20 +1031,20 @@
if (status)
return -ENOMEM;
-
+ /* Initialize the sync lock between GPU and CPU */
if (msm_soc_version_supports_iommu_v1() &&
(device->id == KGSL_DEVICE_3D0))
kgsl_iommu_init_sync_lock(mmu);
}
+ /* We use the GPU MMU to control access to IOMMU registers on 8960 with
+ * a225, hence we still keep the MMU active on 8960 */
if (cpu_is_msm8960()) {
struct kgsl_mh *mh = &(mmu->device->mh);
kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000001);
kgsl_regwrite(mmu->device, MH_MMU_MPU_END,
mh->mpu_base +
- iommu->iommu_units
- [iommu->unit_count - 1].reg_map.gpuaddr -
- PAGE_SIZE);
+ iommu->iommu_units[0].reg_map.gpuaddr);
} else {
kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
}
@@ -793,14 +1066,20 @@
KGSL_CORE_ERR("clk enable failed\n");
goto done;
}
+ /* Get the lsb value of pagetables set in the IOMMU ttbr0 register as
+ * that value should not change when we change pagetables, so while
+ * changing pagetables we can use this lsb value of the pagetable w/o
+ * having to read it again
+ */
for (i = 0; i < iommu->unit_count; i++) {
struct kgsl_iommu_unit *iommu_unit = &iommu->iommu_units[i];
- for (j = 0; j < iommu_unit->dev_count; j++)
- iommu_unit->dev[j].pt_lsb = KGSL_IOMMMU_PT_LSB(
- KGSL_IOMMU_GET_IOMMU_REG(
- iommu_unit->reg_map.hostptr,
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ iommu_unit->dev[j].pt_lsb = KGSL_IOMMMU_PT_LSB(iommu,
+ KGSL_IOMMU_GET_CTX_REG(iommu,
+ iommu_unit,
iommu_unit->dev[j].ctx_id,
TTBR0));
+ }
}
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
@@ -823,6 +1102,9 @@
unsigned int range = kgsl_sg_size(memdesc->sg, memdesc->sglen);
struct kgsl_iommu_pt *iommu_pt = mmu_specific_pt;
+ /* All GPU addresses as assigned are page aligned, but some
+ functions purturb the gpuaddr with an offset, so apply the
+ mask here to make sure we have the right address */
unsigned int gpuaddr = memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK;
@@ -836,7 +1118,11 @@
range, ret);
#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
- if (!ret)
+ /*
+ * Flushing only required if per process pagetables are used. With
+ * global case, flushing will happen inside iommu_map function
+ */
+ if (!ret && msm_soc_version_supports_iommu_v1())
*tlb_flags = UINT_MAX;
#endif
return 0;
@@ -874,17 +1160,39 @@
static void kgsl_iommu_stop(struct kgsl_mmu *mmu)
{
struct kgsl_iommu *iommu = mmu->priv;
+ int i, j;
+ /*
+ * stop device mmu
+ *
+ * call this with the global lock held
+ */
if (mmu->flags & KGSL_FLAGS_STARTED) {
- kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
-
+ /* detach iommu attachment */
kgsl_detach_pagetable_iommu_domain(mmu);
mmu->hwpagetable = NULL;
mmu->flags &= ~KGSL_FLAGS_STARTED;
- }
-
+ if (mmu->fault) {
+ for (i = 0; i < iommu->unit_count; i++) {
+ struct kgsl_iommu_unit *iommu_unit =
+ &iommu->iommu_units[i];
+ for (j = 0; j < iommu_unit->dev_count; j++) {
+ if (iommu_unit->dev[j].fault) {
+ kgsl_iommu_enable_clk(mmu, j);
+ KGSL_IOMMU_SET_CTX_REG(iommu,
+ iommu_unit,
+ iommu_unit->dev[j].ctx_id,
+ RESUME, 1);
+ iommu_unit->dev[j].fault = 0;
+ }
+ }
+ }
+ mmu->fault = 0;
+ }
+ }
+ /* switch off MMU clocks and cancel any events it has queued */
iommu->clk_event_queued = false;
kgsl_cancel_events(mmu->device, mmu);
kgsl_iommu_disable_clk(mmu);
@@ -920,98 +1228,117 @@
{
unsigned int pt_base;
struct kgsl_iommu *iommu = mmu->priv;
+ /* We cannot enable or disable the clocks in interrupt context, this
+ function is called from interrupt context if there is an axi error */
if (in_interrupt())
return 0;
-
+ /* Return the current pt base by reading IOMMU pt_base register */
kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER);
- pt_base = readl_relaxed(iommu->iommu_units[0].reg_map.hostptr +
- (KGSL_IOMMU_CONTEXT_USER << KGSL_IOMMU_CTX_SHIFT) +
- KGSL_IOMMU_TTBR0);
+ pt_base = KGSL_IOMMU_GET_CTX_REG(iommu, (&iommu->iommu_units[0]),
+ KGSL_IOMMU_CONTEXT_USER,
+ TTBR0);
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
- return pt_base & (KGSL_IOMMU_TTBR0_PA_MASK <<
- KGSL_IOMMU_TTBR0_PA_SHIFT);
+ return pt_base &
+ (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
}
+/*
+ * kgsl_iommu_default_setstate - Change the IOMMU pagetable or flush IOMMU tlb
+ * of the primary context bank
+ * @mmu - Pointer to mmu structure
+ * @flags - Flags indicating whether pagetable has to chnage or tlb is to be
+ * flushed or both
+ *
+ * Based on flags set the new pagetable fo the IOMMU unit or flush it's tlb or
+ * do both by doing direct register writes to the IOMMu registers through the
+ * cpu
+ * Return - void
+ */
static void kgsl_iommu_default_setstate(struct kgsl_mmu *mmu,
uint32_t flags)
{
struct kgsl_iommu *iommu = mmu->priv;
int temp;
int i;
- unsigned int pt_base = kgsl_iommu_pt_get_base_addr(
- mmu->hwpagetable);
+ unsigned int pt_base = kgsl_iommu_get_pt_base_addr(mmu,
+ mmu->hwpagetable);
unsigned int pt_val;
if (kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER)) {
KGSL_DRV_ERR(mmu->device, "Failed to enable iommu clocks\n");
return;
}
-
- pt_base &= (KGSL_IOMMU_TTBR0_PA_MASK << KGSL_IOMMU_TTBR0_PA_SHIFT);
+ /* Mask off the lsb of the pt base address since lsb will not change */
+ pt_base &= (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
-
- if (msm_soc_version_supports_iommu_v1())
- kgsl_idle(mmu->device);
-
-
+ //if (msm_soc_version_supports_iommu_v1())
+ /* Acquire GPU-CPU sync Lock here */
msm_iommu_lock();
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
if (!msm_soc_version_supports_iommu_v1())
kgsl_idle(mmu->device);
for (i = 0; i < iommu->unit_count; i++) {
+ /* get the lsb value which should not change when
+ * changing ttbr0 */
pt_val = kgsl_iommu_get_pt_lsb(mmu, i,
KGSL_IOMMU_CONTEXT_USER);
pt_val += pt_base;
- KGSL_IOMMU_SET_IOMMU_REG(
- iommu->iommu_units[i].reg_map.hostptr,
+ KGSL_IOMMU_SET_CTX_REG(iommu, (&iommu->iommu_units[i]),
KGSL_IOMMU_CONTEXT_USER, TTBR0, pt_val);
mb();
- temp = KGSL_IOMMU_GET_IOMMU_REG(
- iommu->iommu_units[i].reg_map.hostptr,
+ temp = KGSL_IOMMU_GET_CTX_REG(iommu,
+ (&iommu->iommu_units[i]),
KGSL_IOMMU_CONTEXT_USER, TTBR0);
}
}
-
+ /* Flush tlb */
if (flags & KGSL_MMUFLAGS_TLBFLUSH) {
for (i = 0; i < iommu->unit_count; i++) {
- KGSL_IOMMU_SET_IOMMU_REG(
- iommu->iommu_units[i].reg_map.hostptr,
- KGSL_IOMMU_CONTEXT_USER, CTX_TLBIALL,
- 1);
+ KGSL_IOMMU_SET_CTX_REG(iommu, (&iommu->iommu_units[i]),
+ KGSL_IOMMU_CONTEXT_USER, TLBIALL, 1);
mb();
}
}
-
+ /* Release GPU-CPU sync Lock here */
msm_iommu_unlock();
-
+ /* Disable smmu clock */
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
}
-static int kgsl_iommu_get_reg_map_desc(struct kgsl_mmu *mmu,
- void **reg_map_desc)
+/*
+ * kgsl_iommu_get_reg_gpuaddr - Returns the gpu address of IOMMU regsiter
+ * @mmu - Pointer to mmu structure
+ * @iommu_unit - The iommu unit for which base address is requested
+ * @ctx_id - The context ID of the IOMMU ctx
+ * @reg - The register for which address is required
+ *
+ * Return - The number of iommu units which is also the number of register
+ * mapped descriptor arrays which the out parameter will have
+ */
+static unsigned int kgsl_iommu_get_reg_gpuaddr(struct kgsl_mmu *mmu,
+ int iommu_unit, int ctx_id, int reg)
{
struct kgsl_iommu *iommu = mmu->priv;
- void **reg_desc_ptr;
- int i;
- reg_desc_ptr = kmalloc(iommu->unit_count *
- sizeof(struct kgsl_memdesc *), GFP_KERNEL);
- if (!reg_desc_ptr) {
- KGSL_CORE_ERR("Failed to kmalloc(%d)\n",
- iommu->unit_count * sizeof(struct kgsl_memdesc *));
- return -ENOMEM;
- }
+ if (KGSL_IOMMU_GLOBAL_BASE == reg)
+ return iommu->iommu_units[iommu_unit].reg_map.gpuaddr;
+ else
+ return iommu->iommu_units[iommu_unit].reg_map.gpuaddr +
+ iommu->iommu_reg_list[reg].reg_offset +
+ (ctx_id << KGSL_IOMMU_CTX_SHIFT) + iommu->ctx_offset;
+}
- for (i = 0; i < iommu->unit_count; i++)
- reg_desc_ptr[i] = &(iommu->iommu_units[i].reg_map);
-
- *reg_map_desc = reg_desc_ptr;
- return i;
+static int kgsl_iommu_get_num_iommu_units(struct kgsl_mmu *mmu)
+{
+ struct kgsl_iommu *iommu = mmu->priv;
+ return iommu->unit_count;
}
struct kgsl_mmu_ops iommu_ops = {
@@ -1026,7 +1353,10 @@
.mmu_enable_clk = kgsl_iommu_enable_clk,
.mmu_disable_clk_on_ts = kgsl_iommu_disable_clk_on_ts,
.mmu_get_pt_lsb = kgsl_iommu_get_pt_lsb,
- .mmu_get_reg_map_desc = kgsl_iommu_get_reg_map_desc,
+ .mmu_get_reg_gpuaddr = kgsl_iommu_get_reg_gpuaddr,
+ .mmu_get_num_iommu_units = kgsl_iommu_get_num_iommu_units,
+ .mmu_pt_equal = kgsl_iommu_pt_equal,
+ .mmu_get_pt_base_addr = kgsl_iommu_get_pt_base_addr,
.mmu_sync_lock = kgsl_iommu_sync_lock,
.mmu_sync_unlock = kgsl_iommu_sync_unlock,
};
@@ -1036,6 +1366,4 @@
.mmu_unmap = kgsl_iommu_unmap,
.mmu_create_pagetable = kgsl_iommu_create_pagetable,
.mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable,
- .mmu_pt_equal = kgsl_iommu_pt_equal,
- .mmu_pt_get_base_addr = kgsl_iommu_pt_get_base_addr,
};
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index 3389f08..4507700 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -15,35 +15,72 @@
#include <mach/iommu.h>
-#define KGSL_IOMMU_TTBR0 0x10
-#define KGSL_IOMMU_TTBR1 0x14
-#define KGSL_IOMMU_FSR 0x20
+#define KGSL_IOMMU_CTX_OFFSET_V1 0
+#define KGSL_IOMMU_CTX_OFFSET_V2 0x8000
+#define KGSL_IOMMU_CTX_SHIFT 12
-#define KGSL_IOMMU_TTBR0_PA_MASK 0x0003FFFF
-#define KGSL_IOMMU_TTBR0_PA_SHIFT 14
-#define KGSL_IOMMU_CTX_TLBIALL 0x800
-#define KGSL_IOMMU_CTX_SHIFT 12
+enum kgsl_iommu_reg_map {
+ KGSL_IOMMU_GLOBAL_BASE = 0,
+ KGSL_IOMMU_CTX_TTBR0,
+ KGSL_IOMMU_CTX_TTBR1,
+ KGSL_IOMMU_CTX_FSR,
+ KGSL_IOMMU_CTX_TLBIALL,
+ KGSL_IOMMU_CTX_RESUME,
+ KGSL_IOMMU_REG_MAX
+};
+struct kgsl_iommu_register_list {
+ unsigned int reg_offset;
+ unsigned int reg_mask;
+ unsigned int reg_shift;
+};
+
+/*
+ * Max number of iommu units that the gpu core can have
+ * On APQ8064, KGSL can control a maximum of 2 IOMMU units.
+ */
#define KGSL_IOMMU_MAX_UNITS 2
+/* Max number of iommu contexts per IOMMU unit */
#define KGSL_IOMMU_MAX_DEVS_PER_UNIT 2
-#define KGSL_IOMMU_SET_IOMMU_REG(base_addr, ctx, REG, val) \
- writel_relaxed(val, base_addr + \
- (ctx << KGSL_IOMMU_CTX_SHIFT) + \
- KGSL_IOMMU_##REG)
+/* Macros to read/write IOMMU registers */
+#define KGSL_IOMMU_SET_CTX_REG(iommu, iommu_unit, ctx, REG, val) \
+ writel_relaxed(val, \
+ iommu_unit->reg_map.hostptr + \
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_##REG].reg_offset +\
+ (ctx << KGSL_IOMMU_CTX_SHIFT) + \
+ iommu->ctx_offset)
-#define KGSL_IOMMU_GET_IOMMU_REG(base_addr, ctx, REG) \
- readl_relaxed(base_addr + \
- (ctx << KGSL_IOMMU_CTX_SHIFT) + \
- KGSL_IOMMU_##REG)
+#define KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit, ctx, REG) \
+ readl_relaxed( \
+ iommu_unit->reg_map.hostptr + \
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_##REG].reg_offset +\
+ (ctx << KGSL_IOMMU_CTX_SHIFT) + \
+ iommu->ctx_offset)
-#define KGSL_IOMMMU_PT_LSB(pt_val) \
- (pt_val & ~(KGSL_IOMMU_TTBR0_PA_MASK << \
- KGSL_IOMMU_TTBR0_PA_SHIFT))
+/* Gets the lsb value of pagetable */
+#define KGSL_IOMMMU_PT_LSB(iommu, pt_val) \
+ (pt_val & \
+ ~(iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask << \
+ iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift))
+/* offset at which a nop command is placed in setstate_memory */
#define KGSL_IOMMU_SETSTATE_NOP_OFFSET 1024
+/*
+ * struct kgsl_iommu_device - Structure holding data about iommu contexts
+ * @dev: Device pointer to iommu context
+ * @attached: Indicates whether this iommu context is presently attached to
+ * a pagetable/domain or not
+ * @pt_lsb: The LSB of IOMMU_TTBR0 register which is the pagetable
+ * register
+ * @ctx_id: This iommu units context id. It can be either 0 or 1
+ * @clk_enabled: If set indicates that iommu clocks of this iommu context
+ * are on, else the clocks are off
+ * fault: Flag when set indicates that this iommu device has caused a page
+ * fault
+ */
struct kgsl_iommu_device {
struct device *dev;
bool attached;
@@ -51,25 +88,62 @@
enum kgsl_iommu_context_id ctx_id;
bool clk_enabled;
struct kgsl_device *kgsldev;
+ int fault;
};
+/*
+ * struct kgsl_iommu_unit - Structure holding data about iommu units. An IOMMU
+ * units is basically a separte IOMMU h/w block with it's own IOMMU contexts
+ * @dev: Pointer to array of struct kgsl_iommu_device which has information
+ * about the IOMMU contexts under this IOMMU unit
+ * @dev_count: Number of IOMMU contexts that are valid in the previous feild
+ * @reg_map: Memory descriptor which holds the mapped address of this IOMMU
+ * units register range
+ */
struct kgsl_iommu_unit {
struct kgsl_iommu_device dev[KGSL_IOMMU_MAX_DEVS_PER_UNIT];
unsigned int dev_count;
struct kgsl_memdesc reg_map;
};
+/*
+ * struct kgsl_iommu - Structure holding iommu data for kgsl driver
+ * @dev: Array of kgsl_iommu_device which contain information about
+ * iommu contexts owned by graphics cores
+ * @unit_count: Number of IOMMU units that are available for this
+ * instance of the IOMMU driver
+ * @iommu_last_cmd_ts: The timestamp of last command submitted that
+ * aceeses iommu registers
+ * @clk_event_queued: Indicates whether an event to disable clocks
+ * is already queued or not
+ * @device: Pointer to kgsl device
+ * @ctx_offset: The context offset to be added to base address when
+ * accessing IOMMU registers
+ * @iommu_reg_list: List of IOMMU registers { offset, map, shift } array
+ * @sync_lock_vars: Pointer to the IOMMU spinlock for serializing access to the
+ * IOMMU registers
+ * @sync_lock_desc: GPU Memory descriptor for the memory containing the
+ * spinlocks
+ * @sync_lock_initialized: True if the sync_lock feature is enabled
+ */
struct kgsl_iommu {
struct kgsl_iommu_unit iommu_units[KGSL_IOMMU_MAX_UNITS];
unsigned int unit_count;
unsigned int iommu_last_cmd_ts;
bool clk_event_queued;
struct kgsl_device *device;
+ unsigned int ctx_offset;
+ struct kgsl_iommu_register_list *iommu_reg_list;
struct remote_iommu_petersons_spinlock *sync_lock_vars;
struct kgsl_memdesc sync_lock_desc;
bool sync_lock_initialized;
};
+/*
+ * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver
+ * @domain: Pointer to the iommu domain that contains the iommu pagetable
+ * @iommu: Pointer to iommu structure
+ */
struct kgsl_iommu_pt {
struct iommu_domain *domain;
struct kgsl_iommu *iommu;
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
index 9c6e317..83d14f7 100644
--- a/drivers/gpu/msm/kgsl_log.h
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2008-2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -103,8 +103,19 @@
#define KGSL_PWR_CRIT(_dev, fmt, args...) \
KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_FT_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_ERR(_dev, fmt, args...) \
+KGSL_LOG_ERR(_dev->dev, _dev->ft_log, fmt, ##args)
+#define KGSL_FT_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->ft_log, fmt, ##args)
+
+/* Core error messages - these are for core KGSL functions that have
+ no device associated with them (such as memory) */
#define KGSL_CORE_ERR(fmt, args...) \
pr_err("kgsl: %s: " fmt, __func__, ##args)
-#endif
+#endif /* __KGSL_LOG_H */
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index df74c11..d1f58c4 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -18,13 +18,14 @@
#include <linux/slab.h>
#include <linux/sched.h>
#include <linux/iommu.h>
+#include <mach/iommu.h>
#include <mach/socinfo.h>
#include "kgsl.h"
#include "kgsl_mmu.h"
#include "kgsl_device.h"
#include "kgsl_sharedmem.h"
-#include "adreno_postmortem.h"
+#include "adreno.h"
#define KGSL_MMU_ALIGN_SHIFT 13
#define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
@@ -36,7 +37,7 @@
static int kgsl_cleanup_pt(struct kgsl_pagetable *pt)
{
int i;
-
+ /* For IOMMU only unmap the global structures to global pt */
if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
(KGSL_MMU_GLOBAL_PT != pt->name) &&
@@ -56,7 +57,7 @@
int i = 0;
int status = 0;
-
+ /* For IOMMU only map the global structures to global pt */
if ((KGSL_MMU_TYPE_NONE != kgsl_mmu_type) &&
(KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type) &&
(KGSL_MMU_GLOBAL_PT != pt->name) &&
@@ -311,29 +312,31 @@
unsigned int kgsl_mmu_get_ptsize(void)
{
+ /*
+ * For IOMMU, we could do up to 4G virtual range if we wanted to, but
+ * it makes more sense to return a smaller range and leave the rest of
+ * the virtual range for future improvements
+ */
if (KGSL_MMU_TYPE_GPU == kgsl_mmu_type)
return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE;
else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_type)
-#ifdef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
- return CONFIG_MSM_KGSL_PAGE_TABLE_SIZE_FOR_IOMMU;
-#else
- return SZ_2G - KGSL_PAGETABLE_BASE;
-#endif
-
+ return SZ_2G - KGSL_PAGETABLE_BASE;
else
return 0;
}
int
-kgsl_mmu_get_ptname_from_ptbase(unsigned int pt_base)
+kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu, unsigned int pt_base)
{
struct kgsl_pagetable *pt;
int ptid = -1;
+ if (!mmu->mmu_ops || !mmu->mmu_ops->mmu_pt_equal)
+ return KGSL_MMU_GLOBAL_PT;
spin_lock(&kgsl_driver.ptlock);
list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
- if (pt->pt_ops->mmu_pt_equal(pt, pt_base)) {
+ if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
ptid = (int) pt->name;
break;
}
@@ -344,6 +347,35 @@
}
EXPORT_SYMBOL(kgsl_mmu_get_ptname_from_ptbase);
+unsigned int
+kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, unsigned int pt_base,
+ unsigned int addr)
+{
+ struct kgsl_pagetable *pt;
+ unsigned int ret = 0;
+
+ if (!mmu->mmu_ops || !mmu->mmu_ops->mmu_pt_equal)
+ return KGSL_MMU_GLOBAL_PT;
+ spin_lock(&kgsl_driver.ptlock);
+ list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+ if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
+ if ((addr & (PAGE_SIZE-1)) == pt->fault_addr) {
+ ret = 1;
+ break;
+ } else {
+ pt->fault_addr = (addr & (PAGE_SIZE-1));
+ ret = 0;
+ break;
+ }
+
+ }
+ }
+ spin_unlock(&kgsl_driver.ptlock);
+
+ return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_log_fault_addr);
+
int kgsl_mmu_init(struct kgsl_device *device)
{
int status = 0;
@@ -379,7 +411,7 @@
if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) {
kgsl_regwrite(device, MH_MMU_CONFIG, 0);
-
+ /* Setup gpuaddr of global mappings */
if (!mmu->setstate_memory.gpuaddr)
kgsl_setup_pt(NULL);
return 0;
@@ -395,6 +427,10 @@
kgsl_regread(device, MH_AXI_ERROR, ®);
pt_base = kgsl_mmu_get_current_ptbase(&device->mmu);
+ /*
+ * Read gpu virtual and physical addresses that
+ * caused the error from the debug data.
+ */
kgsl_regwrite(device, MH_DEBUG_CTRL, 44);
kgsl_regread(device, MH_DEBUG_DATA, &gpu_err);
kgsl_regwrite(device, MH_DEBUG_CTRL, 45);
@@ -445,7 +481,12 @@
pagetable->name = name;
pagetable->max_entries = KGSL_PAGETABLE_ENTRIES(ptsize);
+ pagetable->fault_addr = 0xFFFFFFFF;
+ /*
+ * create a separate kgsl pool for IOMMU, global mappings can be mapped
+ * just once from this pool of the defaultpagetable
+ */
if ((KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) &&
((KGSL_MMU_GLOBAL_PT == name) ||
(KGSL_MMU_PRIV_BANK_TABLE_NAME == name))) {
@@ -493,7 +534,7 @@
list_add(&pagetable->list, &kgsl_driver.pagetable_list);
spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
-
+ /* Create the sysfs entries */
pagetable_add_sysfs_objects(pagetable);
return pagetable;
@@ -521,6 +562,10 @@
#ifndef CONFIG_KGSL_PER_PROCESS_PAGE_TABLE
name = KGSL_MMU_GLOBAL_PT;
#endif
+ /* We presently do not support per-process for IOMMU-v2 */
+ if (!msm_soc_version_supports_iommu_v1())
+ name = KGSL_MMU_GLOBAL_PT;
+
pt = kgsl_get_pagetable(name);
if (pt == NULL)
@@ -539,6 +584,12 @@
uint32_t flags)
{
struct kgsl_device *device = mmu->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+ if (!(flags & (KGSL_MMUFLAGS_TLBFLUSH | KGSL_MMUFLAGS_PTUPDATE))
+ && !adreno_is_a2xx(adreno_dev))
+ return;
+
if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
return;
else if (device->ftbl->setstate)
@@ -551,11 +602,11 @@
void kgsl_mh_start(struct kgsl_device *device)
{
struct kgsl_mh *mh = &device->mh;
-
+ /* force mmu off to for now*/
kgsl_regwrite(device, MH_MMU_CONFIG, 0);
kgsl_idle(device);
-
+ /* define physical memory range accessible by the core */
kgsl_regwrite(device, MH_MMU_MPU_BASE, mh->mpu_base);
kgsl_regwrite(device, MH_MMU_MPU_END,
mh->mpu_base + mh->mpu_range);
@@ -569,13 +620,17 @@
kgsl_regwrite(device, MH_CLNT_INTF_CTRL_CONFIG2,
mh->mh_intf_cfg2);
+ /*
+ * Interrupts are enabled on a per-device level when
+ * kgsl_pwrctrl_irq() is called
+ */
}
static inline struct gen_pool *
_get_pool(struct kgsl_pagetable *pagetable, unsigned int flags)
{
if (pagetable->kgsl_pool &&
- (KGSL_MEMFLAGS_GLOBAL & flags))
+ (KGSL_MEMDESC_GLOBAL & flags))
return pagetable->kgsl_pool;
return pagetable->pool;
}
@@ -588,6 +643,7 @@
int ret;
struct gen_pool *pool;
int size;
+ int page_align = ilog2(PAGE_SIZE);
if (kgsl_mmu_type == KGSL_MMU_TYPE_NONE) {
if (memdesc->sglen == 1) {
@@ -609,10 +665,19 @@
size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
-
+ /* Allocate from kgsl pool if it exists for global mappings */
pool = _get_pool(pagetable, memdesc->priv);
- memdesc->gpuaddr = gen_pool_alloc(pool, size);
+ /* Allocate aligned virtual addresses for iommu. This allows
+ * more efficient pagetable entries if the physical memory
+ * is also aligned. Don't do this for GPUMMU, because
+ * the address space is so small.
+ */
+ if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype() &&
+ kgsl_memdesc_get_align(memdesc) > 0)
+ page_align = kgsl_memdesc_get_align(memdesc);
+
+ memdesc->gpuaddr = gen_pool_alloc_aligned(pool, size, page_align);
if (memdesc->gpuaddr == 0) {
KGSL_CORE_ERR("gen_pool_alloc(%d) failed from pool: %s\n",
size,
@@ -634,7 +699,7 @@
if (ret)
goto err_free_gpuaddr;
-
+ /* Keep track of the statistics for the sysfs files */
KGSL_STATS_ADD(1, pagetable->stats.entries,
pagetable->stats.max_entries);
@@ -660,6 +725,8 @@
{
struct gen_pool *pool;
int size;
+ unsigned int start_addr = 0;
+ unsigned int end_addr = 0;
if (memdesc->size == 0 || memdesc->gpuaddr == 0)
return 0;
@@ -671,13 +738,22 @@
size = kgsl_sg_size(memdesc->sg, memdesc->sglen);
+ start_addr = memdesc->gpuaddr;
+ end_addr = (memdesc->gpuaddr + size);
+
if (KGSL_MMU_TYPE_IOMMU != kgsl_mmu_get_mmutype())
spin_lock(&pagetable->lock);
pagetable->pt_ops->mmu_unmap(pagetable->priv, memdesc,
&pagetable->tlb_flags);
+
+ /* If buffer is unmapped 0 fault addr */
+ if ((pagetable->fault_addr >= start_addr) &&
+ (pagetable->fault_addr < end_addr))
+ pagetable->fault_addr = 0;
+
if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
spin_lock(&pagetable->lock);
-
+ /* Remove the statistics */
pagetable->stats.entries--;
pagetable->stats.mapped -= size;
@@ -686,7 +762,11 @@
pool = _get_pool(pagetable, memdesc->priv);
gen_pool_free(pool, memdesc->gpuaddr, size);
- if (!(memdesc->priv & KGSL_MEMFLAGS_GLOBAL))
+ /*
+ * Don't clear the gpuaddr on global mappings because they
+ * may be in use by other pagetables
+ */
+ if (!(memdesc->priv & KGSL_MEMDESC_GLOBAL))
memdesc->gpuaddr = 0;
return 0;
}
@@ -702,18 +782,18 @@
KGSL_CORE_ERR("invalid memdesc\n");
goto error;
}
-
+ /* Not all global mappings are needed for all MMU types */
if (!memdesc->size)
return 0;
gpuaddr = memdesc->gpuaddr;
- memdesc->priv |= KGSL_MEMFLAGS_GLOBAL;
+ memdesc->priv |= KGSL_MEMDESC_GLOBAL;
result = kgsl_mmu_map(pagetable, memdesc, protflags);
if (result)
goto error;
-
+ /*global mappings must have the same gpu address in all pagetables*/
if (gpuaddr && gpuaddr != memdesc->gpuaddr) {
KGSL_CORE_ERR("pt %p addr mismatch phys 0x%08x"
"gpu 0x%0x 0x%08x", pagetable, memdesc->physaddr,
@@ -792,15 +872,13 @@
void kgsl_mmu_set_mmutype(char *mmutype)
{
-
+ /* Set the default MMU - GPU on <=8960 and nothing on >= 8064 */
kgsl_mmu_type =
cpu_is_apq8064() ? KGSL_MMU_TYPE_NONE : KGSL_MMU_TYPE_GPU;
-#ifndef CONFIG_MSM_KGSL_DEFAULT_GPUMMU
-
+ /* Use the IOMMU if it is found */
if (iommu_present(&platform_bus_type))
kgsl_mmu_type = KGSL_MMU_TYPE_IOMMU;
-#endif
if (mmutype && !strncmp(mmutype, "gpummu", 6))
kgsl_mmu_type = KGSL_MMU_TYPE_GPU;
@@ -811,3 +889,13 @@
kgsl_mmu_type = KGSL_MMU_TYPE_NONE;
}
EXPORT_SYMBOL(kgsl_mmu_set_mmutype);
+
+int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr)
+{
+ if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
+ return 1;
+ return ((gpuaddr >= KGSL_PAGETABLE_BASE) &&
+ (gpuaddr < (KGSL_PAGETABLE_BASE + kgsl_mmu_get_ptsize())));
+}
+EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
+
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index d8713d3..377f342 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,6 +13,10 @@
#ifndef __KGSL_MMU_H
#define __KGSL_MMU_H
+/*
+ * These defines control the split between ttbr1 and ttbr0 pagetables of IOMMU
+ * and what ranges of memory we map to them
+ */
#define KGSL_IOMMU_GLOBAL_MEM_BASE 0xC0000000
#define KGSL_IOMMU_GLOBAL_MEM_SIZE SZ_4M
#define KGSL_IOMMU_TTBR1_SPLIT 2
@@ -20,6 +24,9 @@
#define KGSL_MMU_ALIGN_SHIFT 13
#define KGSL_MMU_ALIGN_MASK (~((1 << KGSL_MMU_ALIGN_SHIFT) - 1))
+/* Identifier for the global page table */
+/* Per process page tables will probably pass in the thread group
+ as an identifier */
#define KGSL_MMU_GLOBAL_PT 0
#define KGSL_MMU_PRIV_BANK_TABLE_NAME 0xFFFFFFFF
@@ -31,6 +38,10 @@
#define GSL_PT_PAGE_RV 0x00000002
#define GSL_PT_PAGE_DIRTY 0x00000004
+/* MMU registers - the register locations for all cores are the
+ same. The method for getting to those locations differs between
+ 2D and 3D, but the 2D and 3D register functions do that magic
+ for us */
#define MH_MMU_CONFIG 0x0040
#define MH_MMU_VA_RANGE 0x0041
@@ -52,6 +63,7 @@
#define MH_CLNT_INTF_CTRL_CONFIG1 0x0A54
#define MH_CLNT_INTF_CTRL_CONFIG2 0x0A55
+/* MH_MMU_CONFIG bit definitions */
#define MH_MMU_CONFIG__RB_W_CLNT_BEHAVIOR__SHIFT 0x00000004
#define MH_MMU_CONFIG__CP_W_CLNT_BEHAVIOR__SHIFT 0x00000006
@@ -65,6 +77,7 @@
#define MH_MMU_CONFIG__TC_R_CLNT_BEHAVIOR__SHIFT 0x00000016
#define MH_MMU_CONFIG__PA_W_CLNT_BEHAVIOR__SHIFT 0x00000018
+/* MMU Flags */
#define KGSL_MMUFLAGS_TLBFLUSH 0x10000000
#define KGSL_MMUFLAGS_PTUPDATE 0x20000000
@@ -101,6 +114,7 @@
} stats;
const struct kgsl_mmu_pt_ops *pt_ops;
unsigned int tlb_flags;
+ unsigned int fault_addr;
void *priv;
};
@@ -126,8 +140,15 @@
int (*mmu_get_pt_lsb)(struct kgsl_mmu *mmu,
unsigned int unit_id,
enum kgsl_iommu_context_id ctx_id);
- int (*mmu_get_reg_map_desc)(struct kgsl_mmu *mmu,
- void **reg_map_desc);
+ unsigned int (*mmu_get_reg_gpuaddr)(struct kgsl_mmu *mmu,
+ int iommu_unit_num, int ctx_id, int reg);
+ int (*mmu_get_num_iommu_units)(struct kgsl_mmu *mmu);
+ int (*mmu_pt_equal) (struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt,
+ unsigned int pt_base);
+ unsigned int (*mmu_get_pt_base_addr)
+ (struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt);
unsigned int (*mmu_sync_lock)
(struct kgsl_mmu *mmu,
unsigned int *cmds);
@@ -146,10 +167,6 @@
unsigned int *tlb_flags);
void *(*mmu_create_pagetable) (void);
void (*mmu_destroy_pagetable) (void *pt);
- int (*mmu_pt_equal) (struct kgsl_pagetable *pt,
- unsigned int pt_base);
- unsigned int (*mmu_pt_get_base_addr)
- (struct kgsl_pagetable *pt);
};
#define KGSL_MMU_FLAGS_IOMMU_SYNC BIT(31)
@@ -160,13 +177,14 @@
struct kgsl_device *device;
unsigned int config;
struct kgsl_memdesc setstate_memory;
-
+ /* current page table object being used by device mmu */
struct kgsl_pagetable *defaultpagetable;
-
+ /* pagetable object used for priv bank of IOMMU */
struct kgsl_pagetable *priv_bank_table;
struct kgsl_pagetable *hwpagetable;
const struct kgsl_mmu_ops *mmu_ops;
void *priv;
+ int fault;
};
#include "kgsl_gpummu.h"
@@ -191,7 +209,10 @@
unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id,
uint32_t flags);
-int kgsl_mmu_get_ptname_from_ptbase(unsigned int pt_base);
+int kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu,
+ unsigned int pt_base);
+unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu,
+ unsigned int pt_base, unsigned int addr);
int kgsl_mmu_pt_get_flags(struct kgsl_pagetable *pt,
enum kgsl_deviceid id);
void kgsl_mmu_ptpool_destroy(void *ptpool);
@@ -200,7 +221,13 @@
void kgsl_mmu_set_mmutype(char *mmutype);
enum kgsl_mmutype kgsl_mmu_get_mmutype(void);
unsigned int kgsl_mmu_get_ptsize(void);
+int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr);
+/*
+ * Static inline functions of MMU that simply call the SMMU specific
+ * function using a function pointer. These functions can be thought
+ * of as wrappers around the actual function
+ */
static inline unsigned int kgsl_mmu_get_current_ptbase(struct kgsl_mmu *mmu)
{
@@ -231,28 +258,21 @@
mmu->mmu_ops->mmu_stop(mmu);
}
-static inline int kgsl_mmu_pt_equal(struct kgsl_pagetable *pt,
+static inline int kgsl_mmu_pt_equal(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt,
unsigned int pt_base)
{
- if (KGSL_MMU_TYPE_NONE == kgsl_mmu_get_mmutype())
+ if (mmu->mmu_ops && mmu->mmu_ops->mmu_pt_equal)
+ return mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base);
+ else
return 1;
- else
- return pt->pt_ops->mmu_pt_equal(pt, pt_base);
}
-static inline unsigned int kgsl_mmu_pt_get_base_addr(struct kgsl_pagetable *pt)
+static inline unsigned int kgsl_mmu_get_pt_base_addr(struct kgsl_mmu *mmu,
+ struct kgsl_pagetable *pt)
{
- if (KGSL_MMU_TYPE_NONE == kgsl_mmu_get_mmutype())
- return 0;
- else
- return pt->pt_ops->mmu_pt_get_base_addr(pt);
-}
-
-static inline int kgsl_mmu_get_reg_map_desc(struct kgsl_mmu *mmu,
- void **reg_map_desc)
-{
- if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_reg_map_desc)
- return mmu->mmu_ops->mmu_get_reg_map_desc(mmu, reg_map_desc);
+ if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_pt_base_addr)
+ return mmu->mmu_ops->mmu_get_pt_base_addr(mmu, pt);
else
return 0;
}
@@ -285,7 +305,7 @@
static inline unsigned int kgsl_mmu_get_int_mask(void)
{
-
+ /* Dont enable gpummu interrupts, if iommu is enabled */
if (KGSL_MMU_TYPE_GPU == kgsl_mmu_get_mmutype())
return KGSL_MMU_INT_MASK;
else
@@ -293,10 +313,23 @@
MH_INTERRUPT_MASK__AXI_WRITE_ERROR);
}
-static inline int kgsl_mmu_gpuaddr_in_range(unsigned int gpuaddr)
+static inline unsigned int kgsl_mmu_get_reg_gpuaddr(struct kgsl_mmu *mmu,
+ int iommu_unit_num,
+ int ctx_id, int reg)
{
- return ((gpuaddr >= KGSL_PAGETABLE_BASE) &&
- (gpuaddr < (KGSL_PAGETABLE_BASE + kgsl_mmu_get_ptsize())));
+ if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_reg_gpuaddr)
+ return mmu->mmu_ops->mmu_get_reg_gpuaddr(mmu, iommu_unit_num,
+ ctx_id, reg);
+ else
+ return 0;
+}
+
+static inline int kgsl_mmu_get_num_iommu_units(struct kgsl_mmu *mmu)
+{
+ if (mmu->mmu_ops && mmu->mmu_ops->mmu_get_num_iommu_units)
+ return mmu->mmu_ops->mmu_get_num_iommu_units(mmu);
+ else
+ return 0;
}
static inline int kgsl_mmu_sync_lock(struct kgsl_mmu *mmu,
@@ -319,4 +352,4 @@
return 0;
}
-#endif
+#endif /* __KGSL_MMU_H */
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 3ed1ec8..d489119 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -17,7 +17,7 @@
#include <linux/pm_runtime.h>
#include <mach/msm_iomap.h>
#include <mach/msm_bus.h>
-#include <linux/fb.h>
+#include <linux/ktime.h>
#include "kgsl.h"
#include "kgsl_pwrscale.h"
@@ -38,16 +38,6 @@
uint map;
};
-struct gpufreq_stats {
- unsigned long long last_time;
- unsigned int last_index;
- unsigned int cur_index;
-};
-
-static spinlock_t gpufreq_stats_lock;
-static unsigned long long gputime_in_state[KGSL_MAX_PWRLEVELS] = {0};
-struct gpufreq_stats gpufreq_stat;
-
struct clk_pair clks[KGSL_MAX_CLKS] = {
{
.name = "src_clk",
@@ -71,72 +61,328 @@
},
};
-static int gpufreq_stats_update(unsigned int update_time_only, unsigned int last_index, unsigned int cur_index)
+/* Update the elapsed time at a particular clock level
+ * if the device is active(on_time = true).Otherwise
+ * store it as sleep time.
+ */
+static void update_clk_statistics(struct kgsl_device *device,
+ bool on_time)
{
- unsigned long long cur_time;
- spin_lock(&gpufreq_stats_lock);
- cur_time = get_jiffies_64();
- if (update_time_only)
- goto done;
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+ struct kgsl_clk_stats *clkstats = &pwr->clk_stats;
+ ktime_t elapsed;
+ int elapsed_us;
+ if (clkstats->start.tv64 == 0)
+ clkstats->start = ktime_get();
+ clkstats->stop = ktime_get();
+ elapsed = ktime_sub(clkstats->stop, clkstats->start);
+ elapsed_us = ktime_to_us(elapsed);
+ clkstats->elapsed += elapsed_us;
+ if (on_time)
+ clkstats->clock_time[pwr->active_pwrlevel] += elapsed_us;
+ else
+ clkstats->clock_time[pwr->num_pwrlevels - 1] += elapsed_us;
+ clkstats->start = ktime_get();
+}
- if (last_index < KGSL_MAX_PWRLEVELS)
- gputime_in_state[last_index] = gputime_in_state[last_index] + cur_time - gpufreq_stat.last_time;
+/*
+ * Given a requested power level do bounds checking on the constraints and
+ * return the nearest possible level
+ */
-done:
+static inline int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level)
+{
+ int max_pwrlevel = max_t(int, pwr->thermal_pwrlevel, pwr->max_pwrlevel);
+ int min_pwrlevel = max_t(int, pwr->thermal_pwrlevel, pwr->min_pwrlevel);
- gpufreq_stat.cur_index = cur_index;
- gpufreq_stat.last_index = last_index;
- gpufreq_stat.last_time = cur_time;
+ if (level < max_pwrlevel)
+ return max_pwrlevel;
+ if (level > min_pwrlevel)
+ return min_pwrlevel;
- spin_unlock(&gpufreq_stats_lock);
- return 0;
+ return level;
}
void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device,
unsigned int new_level)
{
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
- if (new_level < (pwr->num_pwrlevels - 1) &&
- new_level >= pwr->thermal_pwrlevel &&
- new_level != pwr->active_pwrlevel) {
- struct kgsl_pwrlevel *pwrlevel = &pwr->pwrlevels[new_level];
- int diff = new_level - pwr->active_pwrlevel;
- int d = (diff > 0) ? 1 : -1;
- int level = pwr->active_pwrlevel;
- pwr->active_pwrlevel = new_level;
- if ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags)) ||
- (device->state == KGSL_STATE_NAP)) {
- if (pwr->idle_needed == true)
- device->ftbl->idle(device);
- while (level != new_level) {
- level += d;
- clk_set_rate(pwr->grp_clks[0],
- pwr->pwrlevels[level].gpu_freq);
- }
+ struct kgsl_pwrlevel *pwrlevel;
+ int delta;
+ int level;
+
+ /* Adjust the power level to the current constraints */
+ new_level = _adjust_pwrlevel(pwr, new_level);
+
+ if (new_level == pwr->active_pwrlevel)
+ return;
+
+ delta = new_level < pwr->active_pwrlevel ? -1 : 1;
+
+ update_clk_statistics(device, true);
+
+ level = pwr->active_pwrlevel;
+
+ /*
+ * Set the active powerlevel first in case the clocks are off - if we
+ * don't do this then the pwrlevel change won't take effect when the
+ * clocks come back
+ */
+
+ pwr->active_pwrlevel = new_level;
+
+ if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) ||
+ (device->state == KGSL_STATE_NAP)) {
+
+ /*
+ * On some platforms, instability is caused on
+ * changing clock freq when the core is busy.
+ * Idle the gpu core before changing the clock freq.
+ */
+
+ if (pwr->idle_needed == true)
+ device->ftbl->idle(device);
+
+ /*
+ * Don't shift by more than one level at a time to
+ * avoid glitches.
+ */
+
+ while (level != new_level) {
+ level += delta;
+
+ clk_set_rate(pwr->grp_clks[0],
+ pwr->pwrlevels[level].gpu_freq);
}
- if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
- if (pwr->pcl) {
- msm_bus_scale_client_update_request(pwr->pcl,
- pwrlevel->bus_freq);
- } else if (pwr->ebi1_clk)
- clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
- }
- trace_kgsl_pwrlevel(device, pwr->active_pwrlevel,
- pwrlevel->gpu_freq);
}
+
+ pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
+
+ if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)) {
+
+ if (pwr->pcl)
+ msm_bus_scale_client_update_request(pwr->pcl,
+ pwrlevel->bus_freq);
+ else if (pwr->ebi1_clk)
+ clk_set_rate(pwr->ebi1_clk, pwrlevel->bus_freq);
+ }
+
+ trace_kgsl_pwrlevel(device, pwr->active_pwrlevel, pwrlevel->gpu_freq);
}
+
EXPORT_SYMBOL(kgsl_pwrctrl_pwrlevel_change);
-static int __gpuclk_store(int max, struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{ int ret, i, delta = 5000000;
- unsigned long val;
+static int kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
struct kgsl_device *device = kgsl_device_from_dev(dev);
struct kgsl_pwrctrl *pwr;
+ int ret, level;
if (device == NULL)
return 0;
+
+ pwr = &device->pwrctrl;
+
+ ret = sscanf(buf, "%d", &level);
+ if (ret != 1)
+ return count;
+
+ if (level < 0)
+ return count;
+
+ mutex_lock(&device->mutex);
+
+ if (level > pwr->num_pwrlevels - 2)
+ level = pwr->num_pwrlevels - 2;
+
+ pwr->thermal_pwrlevel = level;
+
+ /*
+ * If there is no power policy set the clock to the requested thermal
+ * level - if thermal now happens to be higher than max, then that will
+ * be limited by the pwrlevel change function. Otherwise if there is
+ * a policy only change the active clock if it is higher then the new
+ * thermal level
+ */
+
+ if (device->pwrscale.policy == NULL ||
+ pwr->thermal_pwrlevel > pwr->active_pwrlevel)
+ kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel);
+
+ mutex_unlock(&device->mutex);
+
+ return count;
+}
+
+static int kgsl_pwrctrl_thermal_pwrlevel_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ if (device == NULL)
+ return 0;
+ pwr = &device->pwrctrl;
+ return snprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel);
+}
+
+static int kgsl_pwrctrl_max_pwrlevel_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ int ret, level, max_level;
+
+ if (device == NULL)
+ return 0;
+
+ pwr = &device->pwrctrl;
+
+ ret = sscanf(buf, "%d", &level);
+ if (ret != 1)
+ return count;
+
+ /* If the use specifies a negative number, then don't change anything */
+ if (level < 0)
+ return count;
+
+ mutex_lock(&device->mutex);
+
+ /* You can't set a maximum power level lower than the minimum */
+ if (level > pwr->min_pwrlevel)
+ level = pwr->min_pwrlevel;
+
+ pwr->max_pwrlevel = level;
+
+
+ max_level = max_t(int, pwr->thermal_pwrlevel, pwr->max_pwrlevel);
+
+ /*
+ * If there is no policy then move to max by default. Otherwise only
+ * move max if the current level happens to be higher then the new max
+ */
+
+ if (device->pwrscale.policy == NULL ||
+ (max_level > pwr->active_pwrlevel))
+ kgsl_pwrctrl_pwrlevel_change(device, max_level);
+
+ mutex_unlock(&device->mutex);
+
+ return count;
+}
+
+static int kgsl_pwrctrl_max_pwrlevel_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ if (device == NULL)
+ return 0;
+ pwr = &device->pwrctrl;
+ return snprintf(buf, PAGE_SIZE, "%d\n", pwr->max_pwrlevel);
+}
+
+static int kgsl_pwrctrl_min_pwrlevel_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ int ret, level, min_level;
+
+ if (device == NULL)
+ return 0;
+
+ pwr = &device->pwrctrl;
+
+ ret = sscanf(buf, "%d", &level);
+ if (ret != 1)
+ return count;
+
+ /* Don't do anything on obviously incorrect values */
+ if (level < 0)
+ return count;
+
+ mutex_lock(&device->mutex);
+ if (level > pwr->num_pwrlevels - 2)
+ level = pwr->num_pwrlevels - 2;
+
+ /* You can't set a minimum power level lower than the maximum */
+ if (level < pwr->max_pwrlevel)
+ level = pwr->max_pwrlevel;
+
+ pwr->min_pwrlevel = level;
+
+ min_level = max_t(int, pwr->thermal_pwrlevel, pwr->min_pwrlevel);
+
+ /* Only move the power level higher if minimum is higher then the
+ * current level
+ */
+
+ if (min_level < pwr->active_pwrlevel)
+ kgsl_pwrctrl_pwrlevel_change(device, min_level);
+
+ mutex_unlock(&device->mutex);
+
+ return count;
+}
+
+static int kgsl_pwrctrl_min_pwrlevel_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ if (device == NULL)
+ return 0;
+ pwr = &device->pwrctrl;
+ return snprintf(buf, PAGE_SIZE, "%d\n", pwr->min_pwrlevel);
+}
+
+static int kgsl_pwrctrl_num_pwrlevels_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ if (device == NULL)
+ return 0;
+ pwr = &device->pwrctrl;
+ return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1);
+}
+
+/* Given a GPU clock value, return the nearest powerlevel */
+
+static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock)
+{
+ int i;
+
+ for (i = 0; i < pwr->num_pwrlevels - 1; i++) {
+ if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000)
+ return i;
+ }
+
+ return -ERANGE;
+}
+
+static int kgsl_pwrctrl_max_gpuclk_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ unsigned long val;
+ int ret, level;
+
+ if (device == NULL)
+ return 0;
+
pwr = &device->pwrctrl;
ret = sscanf(buf, "%ld", &val);
@@ -144,40 +390,30 @@
return count;
mutex_lock(&device->mutex);
- for (i = 0; i < pwr->num_pwrlevels; i++) {
- if (abs(pwr->pwrlevels[i].gpu_freq - val) < delta) {
- if (max)
- pwr->thermal_pwrlevel = i;
- break;
- }
- }
-
- if (i == pwr->num_pwrlevels)
+ level = _get_nearest_pwrlevel(pwr, val);
+ if (level < 0)
goto done;
+ pwr->thermal_pwrlevel = level;
- if (pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq >
- pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq)
+ /*
+ * if the thermal limit is lower than the current setting,
+ * move the speed down immediately
+ */
+
+ if (pwr->thermal_pwrlevel > pwr->active_pwrlevel)
kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel);
- else if (!max)
- kgsl_pwrctrl_pwrlevel_change(device, i);
done:
mutex_unlock(&device->mutex);
return count;
}
-static int kgsl_pwrctrl_max_gpuclk_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- return __gpuclk_store(1, dev, attr, buf, count);
-}
-
static int kgsl_pwrctrl_max_gpuclk_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
+
struct kgsl_device *device = kgsl_device_from_dev(dev);
struct kgsl_pwrctrl *pwr;
if (device == NULL)
@@ -191,7 +427,27 @@
struct device_attribute *attr,
const char *buf, size_t count)
{
- return __gpuclk_store(0, dev, attr, buf, count);
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_pwrctrl *pwr;
+ unsigned long val;
+ int ret, level;
+
+ if (device == NULL)
+ return 0;
+
+ pwr = &device->pwrctrl;
+
+ ret = sscanf(buf, "%ld", &val);
+ if (ret != 1)
+ return count;
+
+ mutex_lock(&device->mutex);
+ level = _get_nearest_pwrlevel(pwr, val);
+ if (level >= 0)
+ kgsl_pwrctrl_pwrlevel_change(device, level);
+
+ mutex_unlock(&device->mutex);
+ return count;
}
static int kgsl_pwrctrl_gpuclk_show(struct device *dev,
@@ -277,7 +533,7 @@
mutex_lock(&device->mutex);
-
+ /* Let the timeout be requested in ms, but convert to jiffies. */
val /= div;
if (val >= org_interval_timeout)
pwr->interval_timeout = val;
@@ -304,20 +560,46 @@
{
int ret;
struct kgsl_device *device = kgsl_device_from_dev(dev);
- struct kgsl_busy *b = &device->pwrctrl.busy;
- ret = snprintf(buf, 17, "%7d %7d\n",
- b->on_time_old, b->time_old);
+ struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+ ret = snprintf(buf, PAGE_SIZE, "%7d %7d\n",
+ clkstats->on_time_old, clkstats->elapsed_old);
if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
- b->on_time_old = 0;
- b->time_old = 0;
+ clkstats->on_time_old = 0;
+ clkstats->elapsed_old = 0;
}
return ret;
}
+static int kgsl_pwrctrl_gputop_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ int ret;
+ struct kgsl_device *device = kgsl_device_from_dev(dev);
+ struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+ int i = 0;
+ char *ptr = buf;
+
+ ret = snprintf(buf, PAGE_SIZE, "%7d %7d ", clkstats->on_time_old,
+ clkstats->elapsed_old);
+ for (i = 0, ptr += ret; i < device->pwrctrl.num_pwrlevels;
+ i++, ptr += ret)
+ ret = snprintf(ptr, PAGE_SIZE, "%7d ",
+ clkstats->old_clock_time[i]);
+
+ if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
+ clkstats->on_time_old = 0;
+ clkstats->elapsed_old = 0;
+ for (i = 0; i < KGSL_MAX_PWRLEVELS ; i++)
+ clkstats->old_clock_time[i] = 0;
+ }
+ return (unsigned int) (ptr - buf);
+}
+
static int kgsl_pwrctrl_gpu_available_frequencies_show(
- struct device *dev,
- struct device_attribute *attr,
- char *buf)
+ struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
struct kgsl_device *device = kgsl_device_from_dev(dev);
struct kgsl_pwrctrl *pwr;
@@ -333,131 +615,31 @@
return num_chars;
}
-
-static int kgsl_pwrctrl_gpubusy_time_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- int ret;
- struct kgsl_device *device = kgsl_device_from_dev(dev);
- s64 system_time, busy_time;
-
- if(device == NULL)
- return 0;
-
- system_time = device->gputime.total;
- do_div(system_time, 1000);
- busy_time = device->gputime.busy;
- do_div(busy_time, 1000);
- ret = snprintf(buf, 63, "%lld %lld\n", system_time, busy_time);
-
- return ret;
-}
-
-static int kgsl_pwrctrl_gpubusy_time_in_state_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- int i;
- char* tmp = buf;
- struct kgsl_device *device = kgsl_device_from_dev(dev);
- struct platform_device *pdev = NULL;
- struct kgsl_device_platform_data *pdata = NULL;
- s64 system_time, busy_time;
-
- if (device == NULL)
- return 0;
-
- pdev = container_of(device->parentdev, struct platform_device, dev);
- if (pdev == NULL)
- return 0;
-
- pdata = pdev->dev.platform_data;
- if (pdata == NULL)
- return 0;
-
- for(i=0;i<pdata->num_levels;i++) {
- system_time = device->gputime_in_state[i].total;
- do_div(system_time, 1000);
- busy_time = device->gputime_in_state[i].busy;
- do_div(busy_time, 1000);
- tmp = (char*)( (int)tmp + snprintf(tmp, PAGE_SIZE - (int)(tmp-buf), "%d %lld %lld\n", pdata->pwrlevel[i].gpu_freq, system_time, busy_time));
- }
- return (ssize_t)(tmp - buf);
-}
-
-static int kgsl_pwrctrl_gputime_in_state_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- ssize_t len = 0;
- int i;
- struct kgsl_device *device = kgsl_device_from_dev(dev);
-
- if (device == NULL)
- return 0;
-
- if (test_bit(KGSL_PWRFLAGS_CLK_ON, &device->pwrctrl.power_flags) || (device->state == KGSL_STATE_NAP))
- gpufreq_stats_update(0, device->pwrctrl.active_pwrlevel, device->pwrctrl.active_pwrlevel);
-
- for (i = 0; i < device->pwrctrl.num_pwrlevels; i++) {
- len += sprintf(buf + len, "%u %llu\n", device->pwrctrl.pwrlevels[i].gpu_freq,
- (unsigned long long)jiffies_to_clock_t(gputime_in_state[i]));
- }
-
- return len;
-}
-
-static int kgsl_pwrctrl_init_pwrlevel_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t count)
-{
- char temp[20];
- unsigned long val;
- struct kgsl_device *device = kgsl_device_from_dev(dev);
- struct kgsl_pwrctrl *pwr;
- int rc;
-
- if (device == NULL)
- return 0;
- pwr = &device->pwrctrl;
-
- snprintf(temp, sizeof(temp), "%.*s",
- (int)min(count, sizeof(temp) - 1), buf);
- rc = strict_strtoul(temp, 0, &val);
- if (rc)
- return rc;
-
- mutex_lock(&device->mutex);
-
- if (val >=0 && val < pwr->num_pwrlevels - 1)
- pwr->default_pwrlevel = val;
-
- mutex_unlock(&device->mutex);
-
- return count;
-}
-
-static int kgsl_pwrctrl_init_pwrlevel_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
-{
- struct kgsl_device *device = kgsl_device_from_dev(dev);
- if (device == NULL)
- return 0;
- return snprintf(buf, PAGE_SIZE, "%d\n", device->pwrctrl.default_pwrlevel);
-}
-
DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show, kgsl_pwrctrl_gpuclk_store);
-DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show, kgsl_pwrctrl_max_gpuclk_store);
+DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show,
+ kgsl_pwrctrl_max_gpuclk_store);
DEVICE_ATTR(pwrnap, 0664, kgsl_pwrctrl_pwrnap_show, kgsl_pwrctrl_pwrnap_store);
-DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show, kgsl_pwrctrl_idle_timer_store);
-DEVICE_ATTR(gputime_in_state, 0444, kgsl_pwrctrl_gputime_in_state_show, NULL);
-DEVICE_ATTR(gpubusy, 0644, kgsl_pwrctrl_gpubusy_show, NULL);
-DEVICE_ATTR(gpu_available_frequencies, 0444,kgsl_pwrctrl_gpu_available_frequencies_show,NULL);
-DEVICE_ATTR(gpubusy_time, 0644, kgsl_pwrctrl_gpubusy_time_show, NULL);
-DEVICE_ATTR(gpubusy_time_in_state, 0644, kgsl_pwrctrl_gpubusy_time_in_state_show, NULL);
-DEVICE_ATTR(init_pwrlevel, 0644, kgsl_pwrctrl_init_pwrlevel_show, kgsl_pwrctrl_init_pwrlevel_store);
+DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show,
+ kgsl_pwrctrl_idle_timer_store);
+DEVICE_ATTR(gpubusy, 0444, kgsl_pwrctrl_gpubusy_show,
+ NULL);
+DEVICE_ATTR(gputop, 0444, kgsl_pwrctrl_gputop_show,
+ NULL);
+DEVICE_ATTR(gpu_available_frequencies, 0444,
+ kgsl_pwrctrl_gpu_available_frequencies_show,
+ NULL);
+DEVICE_ATTR(max_pwrlevel, 0644,
+ kgsl_pwrctrl_max_pwrlevel_show,
+ kgsl_pwrctrl_max_pwrlevel_store);
+DEVICE_ATTR(min_pwrlevel, 0644,
+ kgsl_pwrctrl_min_pwrlevel_show,
+ kgsl_pwrctrl_min_pwrlevel_store);
+DEVICE_ATTR(thermal_pwrlevel, 0644,
+ kgsl_pwrctrl_thermal_pwrlevel_show,
+ kgsl_pwrctrl_thermal_pwrlevel_store);
+DEVICE_ATTR(num_pwrlevels, 0444,
+ kgsl_pwrctrl_num_pwrlevels_show,
+ NULL);
static const struct device_attribute *pwrctrl_attr_list[] = {
&dev_attr_gpuclk,
@@ -465,11 +647,12 @@
&dev_attr_pwrnap,
&dev_attr_idle_timer,
&dev_attr_gpubusy,
+ &dev_attr_gputop,
&dev_attr_gpu_available_frequencies,
- &dev_attr_gpubusy_time,
- &dev_attr_gpubusy_time_in_state,
- &dev_attr_gputime_in_state,
- &dev_attr_init_pwrlevel,
+ &dev_attr_max_pwrlevel,
+ &dev_attr_min_pwrlevel,
+ &dev_attr_thermal_pwrlevel,
+ &dev_attr_num_pwrlevels,
NULL
};
@@ -483,27 +666,37 @@
kgsl_remove_device_sysfs_files(device->dev, pwrctrl_attr_list);
}
+static void update_statistics(struct kgsl_device *device)
+{
+ struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+ unsigned int on_time = 0;
+ int i;
+ int num_pwrlevels = device->pwrctrl.num_pwrlevels - 1;
+ /*PER CLK TIME*/
+ for (i = 0; i < num_pwrlevels; i++) {
+ clkstats->old_clock_time[i] = clkstats->clock_time[i];
+ on_time += clkstats->clock_time[i];
+ clkstats->clock_time[i] = 0;
+ }
+ clkstats->old_clock_time[num_pwrlevels] =
+ clkstats->clock_time[num_pwrlevels];
+ clkstats->clock_time[num_pwrlevels] = 0;
+ clkstats->on_time_old = on_time;
+ clkstats->elapsed_old = clkstats->elapsed;
+ clkstats->elapsed = 0;
+}
+
+/* Track the amount of time the gpu is on vs the total system time. *
+ * Regularly update the percentage of busy time displayed by sysfs. */
static void kgsl_pwrctrl_busy_time(struct kgsl_device *device, bool on_time)
{
- struct kgsl_busy *b = &device->pwrctrl.busy;
- int elapsed;
- if (b->start.tv_sec == 0)
- do_gettimeofday(&(b->start));
- do_gettimeofday(&(b->stop));
- elapsed = (b->stop.tv_sec - b->start.tv_sec) * 1000000;
- elapsed += b->stop.tv_usec - b->start.tv_usec;
- b->time += elapsed;
- if (on_time)
- b->on_time += elapsed;
-
- if ((b->time > UPDATE_BUSY_VAL) ||
+ struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+ update_clk_statistics(device, on_time);
+ /* Update the output regularly and reset the counters. */
+ if ((clkstats->elapsed > UPDATE_BUSY_VAL) ||
!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
- b->on_time_old = b->on_time;
- b->time_old = b->time;
- b->on_time = 0;
- b->time = 0;
+ update_statistics(device);
}
- do_gettimeofday(&(b->start));
}
void kgsl_pwrctrl_clk(struct kgsl_device *device, int state,
@@ -515,20 +708,10 @@
if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON,
&pwr->power_flags)) {
trace_kgsl_clk(device, state);
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE
- if(device->id == 0) {
- trace_kgsl_usage(device, state, task_tgid_nr(current), device->gputime.total, device->gputime.busy,
- pwr->active_pwrlevel, pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq);
- device->prev_pid= -1;
- }
-#endif
for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
- if (pwr->grp_clks[i]) {
+ if (pwr->grp_clks[i])
clk_disable(pwr->grp_clks[i]);
- if (i == 0)
- gpufreq_stats_update(0, pwr->active_pwrlevel, (pwr->num_pwrlevels - 1));
- }
-
+ /* High latency clock maintenance. */
if ((pwr->pwrlevels[0].gpu_freq > 0) &&
(requested_state != KGSL_STATE_NAP)) {
clk_set_rate(pwr->grp_clks[0],
@@ -539,20 +722,21 @@
clk_unprepare(pwr->grp_clks[i]);
}
kgsl_pwrctrl_busy_time(device, true);
+ } else if (requested_state == KGSL_STATE_SLEEP) {
+ /* High latency clock maintenance. */
+ if ((pwr->pwrlevels[0].gpu_freq > 0))
+ clk_set_rate(pwr->grp_clks[0],
+ pwr->pwrlevels[pwr->num_pwrlevels - 1].
+ gpu_freq);
+ for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+ if (pwr->grp_clks[i])
+ clk_unprepare(pwr->grp_clks[i]);
}
} else if (state == KGSL_PWRFLAGS_ON) {
if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON,
&pwr->power_flags)) {
-
trace_kgsl_clk(device, state);
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE
- if(device->id == 0) {
- trace_kgsl_usage(device, state, task_tgid_nr(current), device->gputime.total, device->gputime.busy,
- pwr->active_pwrlevel, pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq);
- device->prev_pid = task_tgid_nr(current);
- }
-#endif
-
+ /* High latency clock maintenance. */
if (device->state != KGSL_STATE_NAP) {
for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
if (pwr->grp_clks[i])
@@ -564,12 +748,11 @@
[pwr->active_pwrlevel].
gpu_freq);
}
+ /* as last step, enable grp_clk
+ this is to let GPU interrupt to come */
for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
- if (pwr->grp_clks[i]) {
+ if (pwr->grp_clks[i])
clk_enable(pwr->grp_clks[i]);
- if (i == 0)
- gpufreq_stats_update(1, KGSL_MAX_PWRLEVELS, pwr->active_pwrlevel);
- }
kgsl_pwrctrl_busy_time(device, false);
}
}
@@ -617,8 +800,8 @@
if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON,
&pwr->power_flags)) {
trace_kgsl_rail(device, state);
- if (pwr->gpu_dig)
- regulator_disable(pwr->gpu_dig);
+ if (pwr->gpu_cx)
+ regulator_disable(pwr->gpu_cx);
if (pwr->gpu_reg)
regulator_disable(pwr->gpu_reg);
}
@@ -634,8 +817,8 @@
"failed: %d\n",
status);
}
- if (pwr->gpu_dig) {
- int status = regulator_enable(pwr->gpu_dig);
+ if (pwr->gpu_cx) {
+ int status = regulator_enable(pwr->gpu_cx);
if (status)
KGSL_DRV_ERR(device,
"cx regulator_enable "
@@ -678,9 +861,7 @@
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
- spin_lock_init(&gpufreq_stats_lock);
-
-
+ /*acquire clocks */
for (i = 0; i < KGSL_MAX_CLKS; i++) {
if (pdata->clk_map & clks[i].map) {
clk = clk_get(&pdev->dev, clks[i].name);
@@ -689,11 +870,11 @@
pwr->grp_clks[i] = clk;
}
}
-
+ /* Make sure we have a source clk for freq setting */
if (pwr->grp_clks[0] == NULL)
pwr->grp_clks[0] = pwr->grp_clks[1];
-
+ /* put the AXI bus into asynchronous mode with the graphics cores */
if (pdata->set_grp_async != NULL)
pdata->set_grp_async();
@@ -704,6 +885,13 @@
goto done;
}
pwr->num_pwrlevels = pdata->num_levels;
+
+ /* Initialize the user and thermal clock constraints */
+
+ pwr->max_pwrlevel = 0;
+ pwr->min_pwrlevel = pdata->num_levels - 2;
+ pwr->thermal_pwrlevel = 0;
+
pwr->active_pwrlevel = pdata->init_level;
pwr->default_pwrlevel = pdata->init_level;
for (i = 0; i < pdata->num_levels; i++) {
@@ -717,7 +905,7 @@
pwr->pwrlevels[i].io_fraction =
pdata->pwrlevel[i].io_fraction;
}
-
+ /* Do not set_rate for targets in sync with AXI */
if (pwr->pwrlevels[0].gpu_freq > 0)
clk_set_rate(pwr->grp_clks[0], pwr->
pwrlevels[pwr->num_pwrlevels - 1].gpu_freq);
@@ -727,11 +915,11 @@
pwr->gpu_reg = NULL;
if (pwr->gpu_reg) {
- pwr->gpu_dig = regulator_get(&pdev->dev, "vdd_dig");
- if (IS_ERR(pwr->gpu_dig))
- pwr->gpu_dig = NULL;
+ pwr->gpu_cx = regulator_get(&pdev->dev, "vddcx");
+ if (IS_ERR(pwr->gpu_cx))
+ pwr->gpu_cx = NULL;
} else
- pwr->gpu_dig = NULL;
+ pwr->gpu_cx = NULL;
pwr->power_flags = 0;
@@ -762,9 +950,6 @@
pm_runtime_enable(device->parentdev);
register_early_suspend(&device->display_off);
-
- gpufreq_stats_update(1, pwr->active_pwrlevel, KGSL_MAX_PWRLEVELS);
-
return result;
clk_err:
@@ -798,9 +983,9 @@
pwr->gpu_reg = NULL;
}
- if (pwr->gpu_dig) {
- regulator_put(pwr->gpu_dig);
- pwr->gpu_dig = NULL;
+ if (pwr->gpu_cx) {
+ regulator_put(pwr->gpu_cx);
+ pwr->gpu_cx = NULL;
}
for (i = 1; i < KGSL_MAX_CLKS; i++)
@@ -829,14 +1014,17 @@
mod_timer(&device->idle_timer,
jiffies +
device->pwrctrl.interval_timeout);
- device->pwrctrl.busy.no_nap_cnt++;
- if (device->pwrctrl.busy.no_nap_cnt > UPDATE_BUSY) {
+ /* If the GPU has been too busy to sleep, make sure *
+ * that is acurately reflected in the % busy numbers. */
+ device->pwrctrl.clk_stats.no_nap_cnt++;
+ if (device->pwrctrl.clk_stats.no_nap_cnt >
+ UPDATE_BUSY) {
kgsl_pwrctrl_busy_time(device, true);
- device->pwrctrl.busy.no_nap_cnt = 0;
+ device->pwrctrl.clk_stats.no_nap_cnt = 0;
}
}
} else if (device->state & (KGSL_STATE_HUNG |
- KGSL_STATE_DUMP_AND_RECOVER)) {
+ KGSL_STATE_DUMP_AND_FT)) {
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
}
@@ -854,7 +1042,7 @@
kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
else
kgsl_pwrctrl_request_state(device, KGSL_STATE_SLEEP);
-
+ /* Have work run in a non-interrupt context. */
queue_work(device->work_queue, &device->idle_check_ws);
}
}
@@ -875,7 +1063,7 @@
break;
case KGSL_STATE_INIT:
case KGSL_STATE_HUNG:
- case KGSL_STATE_DUMP_AND_RECOVER:
+ case KGSL_STATE_DUMP_AND_FT:
if (test_bit(KGSL_PWRFLAGS_CLK_ON,
&device->pwrctrl.power_flags))
break;
@@ -899,9 +1087,9 @@
mutex_unlock(&device->mutex);
wait_for_completion(&device->hwaccess_gate);
mutex_lock(&device->mutex);
- } else if (device->state == KGSL_STATE_DUMP_AND_RECOVER) {
+ } else if (device->state == KGSL_STATE_DUMP_AND_FT) {
mutex_unlock(&device->mutex);
- wait_for_completion(&device->recovery_gate);
+ wait_for_completion(&device->ft_gate);
mutex_lock(&device->mutex);
} else if (device->state == KGSL_STATE_SLUMBER)
kgsl_pwrctrl_wake(device);
@@ -934,7 +1122,7 @@
_sleep_accounting(struct kgsl_device *device)
{
kgsl_pwrctrl_busy_time(device, false);
- device->pwrctrl.busy.start.tv_sec = 0;
+ device->pwrctrl.clk_stats.start = ktime_set(0, 0);
device->pwrctrl.time = 0;
kgsl_pwrscale_sleep(device);
}
@@ -942,21 +1130,16 @@
static int
_sleep(struct kgsl_device *device)
{
- struct kgsl_pwrctrl *pwr = &device->pwrctrl;
switch (device->state) {
case KGSL_STATE_ACTIVE:
if (!device->ftbl->isidle(device)) {
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
return -EBUSY;
}
-
+ /* fall through */
case KGSL_STATE_NAP:
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
- if (pwr->pwrlevels[0].gpu_freq > 0)
- clk_set_rate(pwr->grp_clks[0],
- pwr->pwrlevels[pwr->num_pwrlevels - 1].
- gpu_freq);
_sleep_accounting(device);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP);
@@ -971,6 +1154,9 @@
kgsl_pwrstate_to_str(device->state));
break;
}
+
+ kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
+
return 0;
}
@@ -983,7 +1169,7 @@
kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
return -EBUSY;
}
-
+ /* fall through */
case KGSL_STATE_NAP:
case KGSL_STATE_SLEEP:
del_timer_sync(&device->idle_timer);
@@ -1004,12 +1190,14 @@
return 0;
}
+/******************************************************************/
+/* Caller must hold the device mutex. */
int kgsl_pwrctrl_sleep(struct kgsl_device *device)
{
int status = 0;
KGSL_PWR_INFO(device, "sleep device %d\n", device->id);
-
+ /* Work through the legal state transitions */
switch (device->requested_state) {
case KGSL_STATE_NAP:
status = _nap(device);
@@ -1031,6 +1219,8 @@
}
EXPORT_SYMBOL(kgsl_pwrctrl_sleep);
+/******************************************************************/
+/* Caller must hold the device mutex. */
void kgsl_pwrctrl_wake(struct kgsl_device *device)
{
int status;
@@ -1043,23 +1233,24 @@
KGSL_DRV_ERR(device, "start failed %d\n", status);
break;
}
-
+ /* fall through */
case KGSL_STATE_SLEEP:
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
kgsl_pwrscale_wake(device);
-
+ /* fall through */
case KGSL_STATE_NAP:
-
+ /* Turn on the core clocks */
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
-
+ /* Enable state before turning on irq */
kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
-
+ /* Re-enable HW access */
mod_timer(&device->idle_timer,
jiffies + device->pwrctrl.interval_timeout);
pm_qos_update_request(&device->pm_qos_req_dma,
- GPU_SWFI_LATENCY);
+ GPU_SWFI_LATENCY);
case KGSL_STATE_ACTIVE:
+ kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
break;
default:
KGSL_PWR_WARN(device, "unhandled state %s\n",
@@ -1072,7 +1263,7 @@
void kgsl_pwrctrl_enable(struct kgsl_device *device)
{
-
+ /* Order pwrrail/clk sequence based upon platform */
kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
@@ -1081,7 +1272,7 @@
void kgsl_pwrctrl_disable(struct kgsl_device *device)
{
-
+ /* Order pwrrail/clk sequence based upon platform */
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_OFF);
@@ -1121,7 +1312,7 @@
return "SUSPEND";
case KGSL_STATE_HUNG:
return "HUNG";
- case KGSL_STATE_DUMP_AND_RECOVER:
+ case KGSL_STATE_DUMP_AND_FT:
return "DNR";
case KGSL_STATE_SLUMBER:
return "SLUMBER";
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
index 591582f..8d66505 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -13,6 +13,9 @@
#ifndef __KGSL_PWRCTRL_H
#define __KGSL_PWRCTRL_H
+/*****************************************************************************
+** power flags
+*****************************************************************************/
#define KGSL_PWRFLAGS_ON 1
#define KGSL_PWRFLAGS_OFF 0
@@ -24,16 +27,41 @@
struct platform_device;
-struct kgsl_busy {
- struct timeval start;
- struct timeval stop;
- int on_time;
- int time;
- int on_time_old;
- int time_old;
+struct kgsl_clk_stats {
+ unsigned int old_clock_time[KGSL_MAX_PWRLEVELS];
+ unsigned int clock_time[KGSL_MAX_PWRLEVELS];
+ unsigned int on_time_old;
+ ktime_t start;
+ ktime_t stop;
unsigned int no_nap_cnt;
+ unsigned int elapsed;
+ unsigned int elapsed_old;
};
+/**
+ * struct kgsl_pwrctrl - Power control settings for a KGSL device
+ * @interrupt_num - The interrupt number for the device
+ * @ebi1_clk - Pointer to the EBI clock structure
+ * @grp_clks - Array of clocks structures that we control
+ * @power_flags - Control flags for power
+ * @pwrlevels - List of supported power levels
+ * @active_pwrlevel - The currently active power level
+ * @thermal_pwrlevel - maximum powerlevel constraint from thermal
+ * @max_pwrlevel - maximum allowable powerlevel per the user
+ * @min_pwrlevel - minimum allowable powerlevel per the user
+ * @num_pwrlevels - number of available power levels
+ * @interval_timeout - timeout in jiffies to be idle before a power event
+ * @strtstp_sleepwake - true if the device supports low latency GPU start/stop
+ * @gpu_reg - pointer to the regulator structure for gpu_reg
+ * @gpu_cx - pointer to the regulator structure for gpu_cx
+ * @pcl - bus scale identifier
+ * @nap_allowed - true if the device supports naps
+ * @idle_needed - true if the device needs a idle before clock change
+ * @irq_name - resource name for the IRQ
+ * @restore_slumber - Flag to indicate that we are in a suspend/restore sequence
+ * @clk_stats - structure of clock statistics
+ */
+
struct kgsl_pwrctrl {
int interrupt_num;
struct clk *ebi1_clk;
@@ -43,18 +71,20 @@
unsigned int active_pwrlevel;
int thermal_pwrlevel;
unsigned int default_pwrlevel;
+ unsigned int max_pwrlevel;
+ unsigned int min_pwrlevel;
unsigned int num_pwrlevels;
unsigned int interval_timeout;
bool strtstp_sleepwake;
struct regulator *gpu_reg;
- struct regulator *gpu_dig;
+ struct regulator *gpu_cx;
uint32_t pcl;
unsigned int nap_allowed;
unsigned int idle_needed;
const char *irq_name;
s64 time;
- struct kgsl_busy busy;
unsigned int restore_slumber;
+ struct kgsl_clk_stats clk_stats;
};
void kgsl_pwrctrl_irq(struct kgsl_device *device, int state);
@@ -79,4 +109,4 @@
void kgsl_pwrctrl_set_state(struct kgsl_device *device, unsigned int state);
void kgsl_pwrctrl_request_state(struct kgsl_device *device, unsigned int state);
-#endif
+#endif /* __KGSL_PWRCTRL_H */
diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c
index 12e1885..dffae70 100644
--- a/drivers/gpu/msm/kgsl_pwrscale.c
+++ b/drivers/gpu/msm/kgsl_pwrscale.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -39,6 +39,7 @@
struct kgsl_pwrscale_attribute pwrscale_attr_##_name = \
__ATTR(_name, _mode, _show, _store)
+/* Master list of available policies */
static struct kgsl_pwrscale_policy *kgsl_pwrscale_policies[] = {
#ifdef CONFIG_MSM_SCM
@@ -59,6 +60,8 @@
int i;
struct kgsl_pwrscale_policy *policy = NULL;
+ /* The special keyword none allows the user to detach all
+ policies */
if (!strncmp("none", buf, 4)) {
kgsl_pwrscale_detach_policy(device);
return count;
@@ -234,11 +237,9 @@
void kgsl_pwrscale_busy(struct kgsl_device *device)
{
if (PWRSCALE_ACTIVE(device) && device->pwrscale.policy->busy)
- if ((!device->pwrscale.gpu_busy) &&
- (device->requested_state != KGSL_STATE_SLUMBER))
+ if (device->requested_state != KGSL_STATE_SLUMBER)
device->pwrscale.policy->busy(device,
&device->pwrscale);
- device->pwrscale.gpu_busy = 1;
}
void kgsl_pwrscale_idle(struct kgsl_device *device)
@@ -248,7 +249,6 @@
device->requested_state != KGSL_STATE_SLEEP)
device->pwrscale.policy->idle(device,
&device->pwrscale);
- device->pwrscale.gpu_busy = 0;
}
EXPORT_SYMBOL(kgsl_pwrscale_idle);
@@ -299,8 +299,14 @@
{
if (device->pwrscale.policy != NULL) {
device->pwrscale.policy->close(device, &device->pwrscale);
+
+ /*
+ * Try to set max pwrlevel which will be limited to thermal by
+ * kgsl_pwrctrl_pwrlevel_change if thermal is indeed lower
+ */
+
kgsl_pwrctrl_pwrlevel_change(device,
- device->pwrctrl.thermal_pwrlevel);
+ device->pwrctrl.max_pwrlevel);
}
device->pwrscale.policy = NULL;
}
@@ -333,7 +339,7 @@
device->pwrscale.policy = policy;
-
+ /* Pwrscale is enabled by default at attach time */
kgsl_pwrscale_enable(device);
if (policy) {
diff --git a/drivers/gpu/msm/kgsl_pwrscale.h b/drivers/gpu/msm/kgsl_pwrscale.h
index 34698cd..f17b394 100644
--- a/drivers/gpu/msm/kgsl_pwrscale.h
+++ b/drivers/gpu/msm/kgsl_pwrscale.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -36,7 +36,6 @@
struct kgsl_pwrscale_policy *policy;
struct kobject kobj;
void *priv;
- int gpu_busy;
int enabled;
};
diff --git a/drivers/gpu/msm/kgsl_pwrscale_idlestats.c b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c
new file mode 100644
index 0000000..c3188a5
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale_idlestats.c
@@ -0,0 +1,232 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/idle_stats_device.h>
+#include <linux/cpufreq.h>
+#include <linux/notifier.h>
+#include <linux/cpumask.h>
+#include <linux/tick.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+
+#define MAX_CORES 4
+struct _cpu_info {
+ spinlock_t lock;
+ struct notifier_block cpu_nb;
+ u64 start[MAX_CORES];
+ u64 end[MAX_CORES];
+ int curr_freq[MAX_CORES];
+ int max_freq[MAX_CORES];
+};
+
+struct idlestats_priv {
+ char name[32];
+ struct msm_idle_stats_device idledev;
+ struct kgsl_device *device;
+ struct msm_idle_pulse pulse;
+ struct _cpu_info cpu_info;
+};
+
+static int idlestats_cpufreq_notifier(
+ struct notifier_block *nb,
+ unsigned long val, void *data)
+{
+ struct _cpu_info *cpu = container_of(nb,
+ struct _cpu_info, cpu_nb);
+ struct cpufreq_freqs *freq = data;
+
+ if (val != CPUFREQ_POSTCHANGE)
+ return 0;
+
+ spin_lock(&cpu->lock);
+ if (freq->cpu < num_possible_cpus())
+ cpu->curr_freq[freq->cpu] = freq->new / 1000;
+ spin_unlock(&cpu->lock);
+
+ return 0;
+}
+
+static void idlestats_get_sample(struct msm_idle_stats_device *idledev,
+ struct msm_idle_pulse *pulse)
+{
+ struct kgsl_power_stats stats;
+ struct idlestats_priv *priv = container_of(idledev,
+ struct idlestats_priv, idledev);
+ struct kgsl_device *device = priv->device;
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+ mutex_lock(&device->mutex);
+ /* If the GPU is asleep, don't wake it up - assume that we
+ are idle */
+
+ if (device->state == KGSL_STATE_ACTIVE) {
+ device->ftbl->power_stats(device, &stats);
+ pulse->busy_start_time = pwr->time - stats.busy_time;
+ pulse->busy_interval = stats.busy_time;
+ } else {
+ pulse->busy_start_time = pwr->time;
+ pulse->busy_interval = 0;
+ }
+ pulse->wait_interval = 0;
+ mutex_unlock(&device->mutex);
+}
+
+static void idlestats_busy(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ struct idlestats_priv *priv = pwrscale->priv;
+ struct kgsl_power_stats stats;
+ int i, busy, nr_cpu = 1;
+
+ if (priv->pulse.busy_start_time != 0) {
+ priv->pulse.wait_interval = 0;
+ /* Calculate the total CPU busy time for this GPU pulse */
+ for (i = 0; i < num_possible_cpus(); i++) {
+ spin_lock(&priv->cpu_info.lock);
+ if (cpu_online(i)) {
+ priv->cpu_info.end[i] =
+ (u64)ktime_to_us(ktime_get()) -
+ get_cpu_idle_time_us(i, NULL);
+ busy = priv->cpu_info.end[i] -
+ priv->cpu_info.start[i];
+ /* Normalize the busy time by frequency */
+ busy = priv->cpu_info.curr_freq[i] *
+ (busy / priv->cpu_info.max_freq[i]);
+ priv->pulse.wait_interval += busy;
+ nr_cpu++;
+ }
+ spin_unlock(&priv->cpu_info.lock);
+ }
+ priv->pulse.wait_interval /= nr_cpu;
+
+ /* This is called from within a mutex protected function, so
+ no additional locking required */
+ device->ftbl->power_stats(device, &stats);
+
+ /* If total_time is zero, then we don't have
+ any interesting statistics to store */
+ if (stats.total_time == 0) {
+ priv->pulse.busy_start_time = 0;
+ return;
+ }
+
+ priv->pulse.busy_interval = stats.busy_time;
+ msm_idle_stats_idle_end(&priv->idledev, &priv->pulse);
+ }
+ priv->pulse.busy_start_time = ktime_to_us(ktime_get());
+}
+
+static void idlestats_idle(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ int i, nr_cpu;
+ struct idlestats_priv *priv = pwrscale->priv;
+
+ nr_cpu = num_possible_cpus();
+ for (i = 0; i < nr_cpu; i++)
+ if (cpu_online(i))
+ priv->cpu_info.start[i] =
+ (u64)ktime_to_us(ktime_get()) -
+ get_cpu_idle_time_us(i, NULL);
+
+ msm_idle_stats_idle_start(&priv->idledev);
+}
+
+static void idlestats_sleep(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ struct idlestats_priv *priv = pwrscale->priv;
+ msm_idle_stats_update_event(&priv->idledev,
+ MSM_IDLE_STATS_EVENT_IDLE_TIMER_EXPIRED);
+}
+
+static void idlestats_wake(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ /* Use highest perf level on wake-up from
+ sleep for better performance */
+ kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO);
+}
+
+static int idlestats_init(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ struct idlestats_priv *priv;
+ struct cpufreq_policy cpu_policy;
+ int ret, i;
+
+ priv = pwrscale->priv = kzalloc(sizeof(struct idlestats_priv),
+ GFP_KERNEL);
+ if (pwrscale->priv == NULL)
+ return -ENOMEM;
+
+ snprintf(priv->name, sizeof(priv->name), "idle_stats_%s",
+ device->name);
+
+ priv->device = device;
+
+ priv->idledev.name = (const char *) priv->name;
+ priv->idledev.get_sample = idlestats_get_sample;
+
+ spin_lock_init(&priv->cpu_info.lock);
+ priv->cpu_info.cpu_nb.notifier_call =
+ idlestats_cpufreq_notifier;
+ ret = cpufreq_register_notifier(&priv->cpu_info.cpu_nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ if (ret)
+ goto err;
+ for (i = 0; i < num_possible_cpus(); i++) {
+ cpufreq_frequency_table_cpuinfo(&cpu_policy,
+ cpufreq_frequency_get_table(i));
+ priv->cpu_info.max_freq[i] = cpu_policy.max / 1000;
+ priv->cpu_info.curr_freq[i] = cpu_policy.max / 1000;
+ }
+ ret = msm_idle_stats_register_device(&priv->idledev);
+err:
+ if (ret) {
+ kfree(pwrscale->priv);
+ pwrscale->priv = NULL;
+ }
+
+ return ret;
+}
+
+static void idlestats_close(struct kgsl_device *device,
+ struct kgsl_pwrscale *pwrscale)
+{
+ struct idlestats_priv *priv = pwrscale->priv;
+
+ if (pwrscale->priv == NULL)
+ return;
+
+ cpufreq_unregister_notifier(&priv->cpu_info.cpu_nb,
+ CPUFREQ_TRANSITION_NOTIFIER);
+ msm_idle_stats_deregister_device(&priv->idledev);
+
+ kfree(pwrscale->priv);
+ pwrscale->priv = NULL;
+}
+
+struct kgsl_pwrscale_policy kgsl_pwrscale_policy_idlestats = {
+ .name = "idlestats",
+ .init = idlestats_init,
+ .idle = idlestats_idle,
+ .busy = idlestats_busy,
+ .sleep = idlestats_sleep,
+ .wake = idlestats_wake,
+ .close = idlestats_close
+};
diff --git a/drivers/gpu/msm/kgsl_pwrscale_msm.c b/drivers/gpu/msm/kgsl_pwrscale_msm.c
index f3948c3..073e474 100644
--- a/drivers/gpu/msm/kgsl_pwrscale_msm.c
+++ b/drivers/gpu/msm/kgsl_pwrscale_msm.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -17,22 +17,26 @@
#include "kgsl_pwrscale.h"
#include "kgsl_device.h"
#include "a2xx_reg.h"
+#include "kgsl_trace.h"
struct msm_priv {
- struct kgsl_device *device;
- int enabled;
- int handle;
- unsigned int cur_freq;
- struct msm_dcvs_idle idle_source;
- struct msm_dcvs_freq freq_sink;
- struct msm_dcvs_core_info *core_info;
+ struct kgsl_device *device;
+ int enabled;
+ unsigned int cur_freq;
+ unsigned int req_level;
+ int floor_level;
+ struct msm_dcvs_core_info *core_info;
+ int gpu_busy;
+ int dcvs_core_id;
};
-static int msm_idle_enable(struct msm_dcvs_idle *self,
- enum msm_core_control_event event)
+/* reference to be used in idle and freq callbacks */
+static struct msm_priv *the_msm_priv;
+
+static int msm_idle_enable(int type_core_num,
+ enum msm_core_control_event event)
{
- struct msm_priv *priv = container_of(self, struct msm_priv,
- idle_source);
+ struct msm_priv *priv = the_msm_priv;
switch (event) {
case MSM_DCVS_ENABLE_IDLE_PULSE:
@@ -48,16 +52,17 @@
return 0;
}
-static int msm_set_freq(struct msm_dcvs_freq *self,
- unsigned int freq)
+/* Set the requested frequency if it is within 5MHz (delta) of a
+ * supported frequency.
+ */
+static int msm_set_freq(int core_num, unsigned int freq)
{
int i, delta = 5000000;
- struct msm_priv *priv = container_of(self, struct msm_priv,
- freq_sink);
+ struct msm_priv *priv = the_msm_priv;
struct kgsl_device *device = priv->device;
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
-
+ /* msm_dcvs manager uses frequencies in kHz */
freq *= 1000;
for (i = 0; i < pwr->num_pwrlevels; i++)
if (abs(pwr->pwrlevels[i].gpu_freq - freq) < delta)
@@ -66,19 +71,51 @@
return 0;
mutex_lock(&device->mutex);
- kgsl_pwrctrl_pwrlevel_change(device, i);
- priv->cur_freq = pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq;
+ priv->req_level = i;
+ if (priv->req_level <= priv->floor_level) {
+ kgsl_pwrctrl_pwrlevel_change(device, priv->req_level);
+ priv->cur_freq = pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq;
+ }
mutex_unlock(&device->mutex);
-
+ /* return current frequency in kHz */
return priv->cur_freq / 1000;
}
-static unsigned int msm_get_freq(struct msm_dcvs_freq *self)
+static int msm_set_min_freq(int core_num, unsigned int freq)
{
- struct msm_priv *priv = container_of(self, struct msm_priv,
- freq_sink);
-
+ int i, delta = 5000000;
+ struct msm_priv *priv = the_msm_priv;
+ struct kgsl_device *device = priv->device;
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+ /* msm_dcvs manager uses frequencies in kHz */
+ freq *= 1000;
+ for (i = 0; i < pwr->num_pwrlevels; i++)
+ if (abs(pwr->pwrlevels[i].gpu_freq - freq) < delta)
+ break;
+ if (i == pwr->num_pwrlevels)
+ return 0;
+
+ mutex_lock(&device->mutex);
+ priv->floor_level = i;
+ if (priv->floor_level <= priv->req_level)
+ kgsl_pwrctrl_pwrlevel_change(device, priv->floor_level);
+ else if (priv->floor_level > priv->req_level)
+ kgsl_pwrctrl_pwrlevel_change(device, priv->req_level);
+
+ priv->cur_freq = pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq;
+ mutex_unlock(&device->mutex);
+
+ /* return current frequency in kHz */
+ return priv->cur_freq / 1000;
+}
+
+static unsigned int msm_get_freq(int core_num)
+{
+ struct msm_priv *priv = the_msm_priv;
+
+ /* return current frequency in kHz */
return priv->cur_freq / 1000;
}
@@ -86,29 +123,64 @@
struct kgsl_pwrscale *pwrscale)
{
struct msm_priv *priv = pwrscale->priv;
- if (priv->enabled)
- msm_dcvs_idle(priv->handle, MSM_DCVS_IDLE_EXIT, 0);
+ if (priv->enabled && !priv->gpu_busy) {
+ msm_dcvs_idle(priv->dcvs_core_id, MSM_DCVS_IDLE_EXIT, 0);
+ trace_kgsl_mpdcvs(device, 1);
+ priv->gpu_busy = 1;
+ }
return;
}
static void msm_idle(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale)
+ struct kgsl_pwrscale *pwrscale)
{
struct msm_priv *priv = pwrscale->priv;
- unsigned int rb_rptr, rb_wptr;
- kgsl_regread(device, REG_CP_RB_RPTR, &rb_rptr);
- kgsl_regread(device, REG_CP_RB_WPTR, &rb_wptr);
- if (priv->enabled && (rb_rptr == rb_wptr))
- msm_dcvs_idle(priv->handle, MSM_DCVS_IDLE_ENTER, 0);
-
+ if (priv->enabled && priv->gpu_busy)
+ if (device->ftbl->isidle(device)) {
+ msm_dcvs_idle(priv->dcvs_core_id,
+ MSM_DCVS_IDLE_ENTER, 0);
+ trace_kgsl_mpdcvs(device, 0);
+ priv->gpu_busy = 0;
+ }
return;
}
static void msm_sleep(struct kgsl_device *device,
struct kgsl_pwrscale *pwrscale)
{
-
+ struct msm_priv *priv = pwrscale->priv;
+
+ if (priv->enabled && priv->gpu_busy) {
+ msm_dcvs_idle(priv->dcvs_core_id, MSM_DCVS_IDLE_ENTER, 0);
+ trace_kgsl_mpdcvs(device, 0);
+ priv->gpu_busy = 0;
+ }
+
+ return;
+}
+
+static void msm_set_io_fraction(struct kgsl_device *device,
+ unsigned int value)
+{
+ int i;
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+ for (i = 0; i < pwr->num_pwrlevels; i++)
+ pwr->pwrlevels[i].io_fraction = value;
+
+}
+
+static void msm_restore_io_fraction(struct kgsl_device *device)
+{
+ int i;
+ struct kgsl_device_platform_data *pdata =
+ kgsl_device_get_drvdata(device);
+ struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+ for (i = 0; i < pdata->num_levels; i++)
+ pwr->pwrlevels[i].io_fraction =
+ pdata->pwrlevel[i].io_fraction;
}
static int msm_init(struct kgsl_device *device,
@@ -116,59 +188,60 @@
{
struct msm_priv *priv;
struct msm_dcvs_freq_entry *tbl;
- int i, ret, low_level;
+ int i, ret = -EINVAL, low_level;
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
struct platform_device *pdev =
container_of(device->parentdev, struct platform_device, dev);
struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
- priv = pwrscale->priv = kzalloc(sizeof(struct msm_priv),
- GFP_KERNEL);
- if (pwrscale->priv == NULL)
- return -ENOMEM;
+ if (the_msm_priv) {
+ priv = pwrscale->priv = the_msm_priv;
+ } else {
+ priv = pwrscale->priv = kzalloc(sizeof(struct msm_priv),
+ GFP_KERNEL);
+ if (pwrscale->priv == NULL)
+ return -ENOMEM;
- priv->core_info = pdata->core_info;
- tbl = priv->core_info->freq_tbl;
-
- low_level = pwr->num_pwrlevels - KGSL_PWRLEVEL_LAST_OFFSET;
- for (i = 0; i <= low_level; i++)
- tbl[i].freq =
- pwr->pwrlevels[low_level - i].gpu_freq / 1000;
- ret = msm_dcvs_register_core(device->name, 0, priv->core_info);
- if (ret) {
- KGSL_PWR_ERR(device, "msm_dcvs_register_core failed");
- goto err;
+ priv->core_info = pdata->core_info;
+ tbl = priv->core_info->freq_tbl;
+ priv->floor_level = pwr->num_pwrlevels - 1;
+ /* Fill in frequency table from low to high, reversing order. */
+ low_level = pwr->num_pwrlevels - KGSL_PWRLEVEL_LAST_OFFSET;
+ for (i = 0; i <= low_level; i++)
+ tbl[i].freq =
+ pwr->pwrlevels[low_level - i].gpu_freq / 1000;
+ priv->dcvs_core_id =
+ msm_dcvs_register_core(MSM_DCVS_CORE_TYPE_GPU,
+ 0,
+ priv->core_info,
+ msm_set_freq, msm_get_freq, msm_idle_enable,
+ msm_set_min_freq,
+ priv->core_info->sensors[0]);
+ if (priv->dcvs_core_id < 0) {
+ KGSL_PWR_ERR(device, "msm_dcvs_register_core failed");
+ goto err;
+ }
+ the_msm_priv = priv;
}
-
priv->device = device;
- priv->idle_source.enable = msm_idle_enable;
- priv->idle_source.core_name = device->name;
- priv->handle = msm_dcvs_idle_source_register(&priv->idle_source);
- if (priv->handle < 0) {
- ret = priv->handle;
- KGSL_PWR_ERR(device, "msm_dcvs_idle_source_register failed\n");
- goto err;
- }
-
- priv->freq_sink.core_name = device->name;
- priv->freq_sink.set_frequency = msm_set_freq;
- priv->freq_sink.get_frequency = msm_get_freq;
- ret = msm_dcvs_freq_sink_register(&priv->freq_sink);
+ ret = msm_dcvs_freq_sink_start(priv->dcvs_core_id);
if (ret >= 0) {
if (device->ftbl->isidle(device)) {
- device->pwrscale.gpu_busy = 0;
- msm_dcvs_idle(priv->handle, MSM_DCVS_IDLE_ENTER, 0);
+ priv->gpu_busy = 0;
+ msm_dcvs_idle(priv->dcvs_core_id,
+ MSM_DCVS_IDLE_ENTER, 0);
} else {
- device->pwrscale.gpu_busy = 1;
+ priv->gpu_busy = 1;
}
+ msm_set_io_fraction(device, 0);
return 0;
}
KGSL_PWR_ERR(device, "msm_dcvs_freq_sink_register failed\n");
- msm_dcvs_idle_source_unregister(&priv->idle_source);
err:
- kfree(pwrscale->priv);
+ if (!the_msm_priv)
+ kfree(pwrscale->priv);
pwrscale->priv = NULL;
return ret;
@@ -181,10 +254,9 @@
if (pwrscale->priv == NULL)
return;
- msm_dcvs_idle_source_unregister(&priv->idle_source);
- msm_dcvs_freq_sink_unregister(&priv->freq_sink);
- kfree(pwrscale->priv);
+ msm_dcvs_freq_sink_stop(priv->dcvs_core_id);
pwrscale->priv = NULL;
+ msm_restore_io_fraction(device);
}
struct kgsl_pwrscale_policy kgsl_pwrscale_policy_msm = {
diff --git a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
index 5ae3fe0..aa6861e 100644
--- a/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
+++ b/drivers/gpu/msm/kgsl_pwrscale_trustzone.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2010-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2010-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -22,7 +22,6 @@
#include "kgsl.h"
#include "kgsl_pwrscale.h"
#include "kgsl_device.h"
-#include "kgsl_trace.h"
#define TZ_GOVERNOR_PERFORMANCE 0
#define TZ_GOVERNOR_ONDEMAND 1
@@ -31,41 +30,22 @@
int governor;
unsigned int no_switch_cnt;
unsigned int skip_cnt;
+ struct kgsl_power_stats bin;
};
spinlock_t tz_lock;
+/* FLOOR is 5msec to capture up to 3 re-draws
+ * per frame for 60fps content.
+ */
+#define FLOOR 5000
#define SWITCH_OFF 200
#define SWITCH_OFF_RESET_TH 40
#define SKIP_COUNTER 500
#define TZ_RESET_ID 0x3
#define TZ_UPDATE_ID 0x4
-#define TZ_CMD_ID 0x90
-
-
-#define PARAM_INDEX_WRITE_DOWNTHRESHOLD 100
-#define PARAM_INDEX_WRITE_UPTHRESHOLD 101
-#define PARAM_INDEX_WRITE_MINGAPCOUNT 102
-#define PARAM_INDEX_WRITE_NUMGAPS 103
-#define PARAM_INDEX_WRITE_INITIDLEVECTOR 104
-#define PARAM_INDEX_WRITE_DOWNTHRESHOLD_PERCENT 105
-#define PARAM_INDEX_WRITE_UPTHRESHOLD_PERCENT 106
-#define PARAM_INDEX_WRITE_DOWNTHRESHOLD_COUNT 107
-#define PARAM_INDEX_WRITE_UPTHRESHOLD_COUNT 108
-#define PARAM_INDEX_WRITE_ALGORITHM 109
-
-
-#define PARAM_INDEX_READ_DOWNTHRESHOLD 200
-#define PARAM_INDEX_READ_UPTHRESHOLD 201
-#define PARAM_INDEX_READ_MINGAPCOUNT 202
-#define PARAM_INDEX_READ_NUMGAPS 203
-#define PARAM_INDEX_READ_INITIDLEVECTOR 204
-#define PARAM_INDEX_READ_DOWNTHRESHOLD_PERCENT 205
-#define PARAM_INDEX_READ_UPTHRESHOLD_PERCENT 206
-#define PARAM_INDEX_READ_DOWNTHRESHOLD_COUNT 207
-#define PARAM_INDEX_READ_UPTHRESHOLD_COUNT 208
-#define PARAM_INDEX_READ_ALGORITHM 209
#ifdef CONFIG_MSM_SCM
+/* Trap into the TrustZone, and call funcs there. */
static int __secure_tz_entry(u32 cmd, u32 val, u32 id)
{
int ret;
@@ -80,7 +60,7 @@
{
return 0;
}
-#endif
+#endif /* CONFIG_MSM_SCM */
static ssize_t tz_governor_show(struct kgsl_device *device,
struct kgsl_pwrscale *pwrscale,
@@ -118,316 +98,16 @@
priv->governor = TZ_GOVERNOR_PERFORMANCE;
if (priv->governor == TZ_GOVERNOR_PERFORMANCE)
- kgsl_pwrctrl_pwrlevel_change(device, pwr->thermal_pwrlevel);
+ kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel);
mutex_unlock(&device->mutex);
return count;
}
-static ssize_t dcvs_downthreshold_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_DOWNTHRESHOLD);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_downthreshold_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_DOWNTHRESHOLD);
-
- return count;
-}
-
-static ssize_t dcvs_upthreshold_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_UPTHRESHOLD);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_upthreshold_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_UPTHRESHOLD);
-
- return count;
-}
-
-static ssize_t dcvs_down_count_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_MINGAPCOUNT);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_down_count_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_MINGAPCOUNT);
-
- return count;
-}
-
-static ssize_t dcvs_numgaps_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_NUMGAPS);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_numgaps_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_NUMGAPS);
-
- return count;
-}
-
-static ssize_t dcvs_init_idle_vector_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_INITIDLEVECTOR);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_init_idle_vector_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_INITIDLEVECTOR);
-
- return count;
-}
-
-static ssize_t dcvs_algorithm_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_ALGORITHM);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_algorithm_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_ALGORITHM);
-
- return count;
-}
-
-static ssize_t dcvs_upthreshold_percent_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_UPTHRESHOLD_PERCENT);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_upthreshold_percent_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_UPTHRESHOLD_PERCENT);
-
- return count;
-}
-
-static ssize_t dcvs_downthreshold_percent_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_DOWNTHRESHOLD_PERCENT);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_downthreshold_percent_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_DOWNTHRESHOLD_PERCENT);
-
- return count;
-}
-
-static ssize_t dcvs_upthreshold_count_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_UPTHRESHOLD_COUNT);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_upthreshold_count_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_UPTHRESHOLD_COUNT);
-
- return count;
-}
-
-static ssize_t dcvs_downthreshold_count_show(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- char *buf)
-{
- int val, ret;
- val = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_READ_DOWNTHRESHOLD_COUNT);
-
- ret = sprintf(buf, "%d\n", val);
-
- return ret;
-}
-
-static ssize_t dcvs_downthreshold_count_store(struct kgsl_device *device,
- struct kgsl_pwrscale *pwrscale,
- const char *buf, size_t count)
-{
- int val, ret;
-
- ret = sscanf(buf, "%d", &val);
-
- if (ret != 1)
- return -EINVAL;
-
- __secure_tz_entry(TZ_CMD_ID, val, PARAM_INDEX_WRITE_DOWNTHRESHOLD_COUNT);
-
- return count;
-}
-
PWRSCALE_POLICY_ATTR(governor, 0644, tz_governor_show, tz_governor_store);
-PWRSCALE_POLICY_ATTR(dcvs_downthreshold, 0644, dcvs_downthreshold_show, dcvs_downthreshold_store);
-PWRSCALE_POLICY_ATTR(dcvs_upthreshold, 0644, dcvs_upthreshold_show, dcvs_upthreshold_store);
-PWRSCALE_POLICY_ATTR(dcvs_down_count, 0644, dcvs_down_count_show, dcvs_down_count_store);
-PWRSCALE_POLICY_ATTR(dcvs_numgaps, 0644, dcvs_numgaps_show, dcvs_numgaps_store);
-PWRSCALE_POLICY_ATTR(dcvs_init_idle_vector, 0644, dcvs_init_idle_vector_show, dcvs_init_idle_vector_store);
-PWRSCALE_POLICY_ATTR(dcvs_algorithm, 0644, dcvs_algorithm_show, dcvs_algorithm_store);
-PWRSCALE_POLICY_ATTR(dcvs_upthreshold_percent, 0644, dcvs_upthreshold_percent_show, dcvs_upthreshold_percent_store);
-PWRSCALE_POLICY_ATTR(dcvs_downthreshold_percent, 0644, dcvs_downthreshold_percent_show, dcvs_downthreshold_percent_store);
-PWRSCALE_POLICY_ATTR(dcvs_upthreshold_count, 0644, dcvs_upthreshold_count_show, dcvs_upthreshold_count_store);
-PWRSCALE_POLICY_ATTR(dcvs_downthreshold_count, 0644, dcvs_downthreshold_count_show, dcvs_downthreshold_count_store);
static struct attribute *tz_attrs[] = {
&policy_attr_governor.attr,
- &policy_attr_dcvs_downthreshold.attr,
- &policy_attr_dcvs_upthreshold.attr,
- &policy_attr_dcvs_down_count.attr,
- &policy_attr_dcvs_numgaps.attr,
- &policy_attr_dcvs_init_idle_vector.attr,
- &policy_attr_dcvs_algorithm.attr,
- &policy_attr_dcvs_upthreshold_percent.attr,
- &policy_attr_dcvs_downthreshold_percent.attr,
- &policy_attr_dcvs_upthreshold_count.attr,
- &policy_attr_dcvs_downthreshold_count.attr,
NULL
};
@@ -439,12 +119,9 @@
{
struct tz_priv *priv = pwrscale->priv;
if (device->state != KGSL_STATE_NAP &&
- priv->governor == TZ_GOVERNOR_ONDEMAND) {
- trace_kgsl_pwrlevel(device, device->pwrctrl.default_pwrlevel,
- device->pwrctrl.pwrlevels[device->pwrctrl.default_pwrlevel].gpu_freq);
+ priv->governor == TZ_GOVERNOR_ONDEMAND)
kgsl_pwrctrl_pwrlevel_change(device,
device->pwrctrl.default_pwrlevel);
- }
}
static void tz_idle(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
@@ -452,16 +129,26 @@
struct kgsl_pwrctrl *pwr = &device->pwrctrl;
struct tz_priv *priv = pwrscale->priv;
struct kgsl_power_stats stats;
- int val, idle, total_time;
+ int val, idle;
-
+ /* In "performance" mode the clock speed always stays
+ the same */
if (priv->governor == TZ_GOVERNOR_PERFORMANCE)
return;
device->ftbl->power_stats(device, &stats);
- if (stats.total_time == 0)
+ priv->bin.total_time += stats.total_time;
+ priv->bin.busy_time += stats.busy_time;
+ /* Do not waste CPU cycles running this algorithm if
+ * the GPU just started, or if less than FLOOR time
+ * has passed since the last run.
+ */
+ if ((stats.total_time == 0) ||
+ (priv->bin.total_time < FLOOR))
return;
+ /* If the GPU has stayed in turbo mode for a while, *
+ * stop writing out values. */
if (pwr->active_pwrlevel == 0) {
if (priv->no_switch_cnt > SWITCH_OFF) {
priv->skip_cnt++;
@@ -476,15 +163,11 @@
priv->no_switch_cnt = 0;
}
- idle = stats.total_time - stats.busy_time;
+ idle = priv->bin.total_time - priv->bin.busy_time;
+ priv->bin.total_time = 0;
+ priv->bin.busy_time = 0;
idle = (idle > 0) ? idle : 0;
-
-
- total_time = stats.total_time & 0x0FFFFFFF;
- total_time |= (pwr->active_pwrlevel) << 28;
-
- val = __secure_tz_entry(TZ_UPDATE_ID, idle, total_time);
-
+ val = __secure_tz_entry(TZ_UPDATE_ID, idle, device->id);
if (val)
kgsl_pwrctrl_pwrlevel_change(device,
pwr->active_pwrlevel + val);
@@ -501,21 +184,16 @@
{
struct tz_priv *priv = pwrscale->priv;
- trace_kgsl_pwrlevel(device, 0, 0);
-
__secure_tz_entry(TZ_RESET_ID, 0, device->id);
priv->no_switch_cnt = 0;
+ priv->bin.total_time = 0;
+ priv->bin.busy_time = 0;
}
+#ifdef CONFIG_MSM_SCM
static int tz_init(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
{
struct tz_priv *priv;
- int ret;
-
-
- if (!(cpu_is_msm8x60() || cpu_is_msm8960() || cpu_is_apq8064() ||
- cpu_is_msm8930() || cpu_is_msm8930aa() || cpu_is_msm8627()))
- return -EINVAL;
priv = pwrscale->priv = kzalloc(sizeof(struct tz_priv), GFP_KERNEL);
if (pwrscale->priv == NULL)
@@ -525,15 +203,14 @@
spin_lock_init(&tz_lock);
kgsl_pwrscale_policy_add_files(device, pwrscale, &tz_attr_group);
- ret = __secure_tz_entry(TZ_CMD_ID, 0, PARAM_INDEX_WRITE_ALGORITHM);
-
- if(ret == 1)
- pr_info("Using HTC GPU DCVS algorithm\n");
- else
- pr_info("Using QCT GPU DCVS algorithm\n");
-
return 0;
}
+#else
+static int tz_init(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
+{
+ return -EINVAL;
+}
+#endif /* CONFIG_MSM_SCM */
static void tz_close(struct kgsl_device *device, struct kgsl_pwrscale *pwrscale)
{
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index d4a8f92..a345e58 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -24,8 +24,7 @@
#include "kgsl_cffdump.h"
#include "kgsl_device.h"
-struct ion_client* kgsl_client = NULL;
-
+/* An attribute for showing per-process memory statistics */
struct kgsl_mem_entry_attribute {
struct attribute attr;
int memtype;
@@ -43,23 +42,13 @@
.show = _show, \
}
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
-static ssize_t
-gpubusy_show(struct kgsl_process_private *priv, int type, char *buf)
-{
- char* tmp = buf;
- int i;
-
- tmp = (char*)((int)tmp + snprintf(tmp, PAGE_SIZE, "%lld %lld", priv->gputime.total, priv->gputime.busy));
- for(i=0;i<KGSL_MAX_PWRLEVELS;i++)
- tmp = (char*)( (int)tmp + snprintf(tmp, PAGE_SIZE - (int)(tmp-buf), " %lld %lld", priv->gputime_in_state[i].total, priv->gputime_in_state[i].busy));
- tmp = (char*)((int)tmp + snprintf(tmp, PAGE_SIZE, "\n"));
- return (ssize_t)(tmp - buf);
-}
-
-static struct kgsl_mem_entry_attribute gpubusy = __MEM_ENTRY_ATTR(0, gpubusy, gpubusy_show);
-#endif
-
+/*
+ * A structure to hold the attributes for a particular memory type.
+ * For each memory type in each process we store the current and maximum
+ * memory usage and display the counts in sysfs. This structure and
+ * the following macro allow us to simplify the definition for those
+ * adding new memory types
+ */
struct mem_entry_stats {
int memtype;
@@ -77,9 +66,16 @@
}
+/*
+ * One page allocation for a guard region to protect against over-zealous
+ * GPU pre-fetch
+ */
static struct page *kgsl_guard_page;
+/**
+ * Given a kobj, find the process structure attached to it
+ */
static struct kgsl_process_private *
_get_priv_from_kobj(struct kobject *kobj)
@@ -101,6 +97,9 @@
return NULL;
}
+/**
+ * Show the current amount of memory allocated for the given memtype
+ */
static ssize_t
mem_entry_show(struct kgsl_process_private *priv, int type, char *buf)
@@ -108,6 +107,10 @@
return snprintf(buf, PAGE_SIZE, "%d\n", priv->stats[type].cur);
}
+/**
+ * Show the maximum memory allocated for the given memtype through the life of
+ * the process
+ */
static ssize_t
mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf)
@@ -174,9 +177,6 @@
&mem_stats[i].max_attr.attr);
}
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- sysfs_remove_file(&private->kobj, &gpubusy.attr);
-#endif
kobject_put(&private->kobj);
}
@@ -193,15 +193,14 @@
return;
for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+ /* We need to check the value of sysfs_create_file, but we
+ * don't really care if it passed or not */
ret = sysfs_create_file(&private->kobj,
&mem_stats[i].attr.attr);
ret = sysfs_create_file(&private->kobj,
&mem_stats[i].max_attr.attr);
}
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE
- ret = sysfs_create_file(&private->kobj, &gpubusy.attr);
-#endif
}
static int kgsl_drv_memstat_show(struct device *dev,
@@ -314,82 +313,46 @@
}
#endif
-static int kgsl_ion_alloc_vmfault(struct kgsl_memdesc *memdesc,
- struct vm_area_struct *vma,
- struct vm_fault *vmf)
-{
- unsigned long offset, pfn;
- int ret;
-
- offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >> PAGE_SHIFT;
-
- pfn = (memdesc->sg[0].dma_address >> PAGE_SHIFT) + offset;
- ret = vm_insert_pfn(vma, (unsigned long) vmf->virtual_address, pfn);
-
- if (ret == -ENOMEM || ret == -EAGAIN)
- return VM_FAULT_OOM;
- else if (ret == -EFAULT)
- return VM_FAULT_SIGBUS;
-
- return 0;
-}
-
-static int kgsl_ion_alloc_vmflags(struct kgsl_memdesc *memdesc)
-{
- return VM_RESERVED | VM_DONTEXPAND;
-}
-
-static void kgsl_ion_alloc_free(struct kgsl_memdesc *memdesc)
-{
- kgsl_driver.stats.pre_alloc -= memdesc->size;
- if (memdesc->handle)
- ion_free(kgsl_client, memdesc->handle);
-
- if (memdesc->hostptr) {
- iounmap(memdesc->hostptr);
- kgsl_driver.stats.vmalloc -= memdesc->size;
- }
-
- if (memdesc->private)
- kgsl_process_sub_stats(memdesc->private, KGSL_MEM_ENTRY_PRE_ALLOC, memdesc->size);
- else
- kgsl_driver.stats.pre_alloc_kernel -= memdesc->size;
-}
-
-static int kgsl_ion_alloc_map_kernel(struct kgsl_memdesc *memdesc)
-{
- if (!memdesc->hostptr) {
- memdesc->hostptr = ioremap(memdesc->sg[0].dma_address, memdesc->sg[0].length);
- if(IS_ERR_OR_NULL(memdesc->hostptr)) {
- KGSL_CORE_ERR("kgsl: ion ioremap failed\n");
- return -ENOMEM;
- }
- KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc,
- kgsl_driver.stats.vmalloc_max);
- }
-
- return 0;
-}
-
static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc,
struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- unsigned long offset;
- struct page *page;
- int i;
+ int i, pgoff;
+ struct scatterlist *s = memdesc->sg;
+ unsigned int offset;
- offset = (unsigned long) vmf->virtual_address - vma->vm_start;
+ offset = ((unsigned long) vmf->virtual_address - vma->vm_start);
- i = offset >> PAGE_SHIFT;
- page = sg_page(&memdesc->sg[i]);
- if (page == NULL)
+ if (offset >= memdesc->size)
return VM_FAULT_SIGBUS;
- get_page(page);
+ pgoff = offset >> PAGE_SHIFT;
- vmf->page = page;
- return 0;
+ /*
+ * The sglist might be comprised of mixed blocks of memory depending
+ * on how many 64K pages were allocated. This means we have to do math
+ * to find the actual 4K page to map in user space
+ */
+
+ for (i = 0; i < memdesc->sglen; i++) {
+ int npages = s->length >> PAGE_SHIFT;
+
+ if (pgoff < npages) {
+ struct page *page = sg_page(s);
+
+ page = nth_page(page, pgoff);
+
+ get_page(page);
+ vmf->page = page;
+
+ return 0;
+ }
+
+ pgoff -= npages;
+ s = sg_next(s);
+ }
+
+ return VM_FAULT_SIGBUS;
}
static int kgsl_page_alloc_vmflags(struct kgsl_memdesc *memdesc)
@@ -403,8 +366,8 @@
struct scatterlist *sg;
int sglen = memdesc->sglen;
-
- if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE)
+ /* Don't free the guard page if it was used */
+ if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)
sglen--;
kgsl_driver.stats.page_alloc -= memdesc->size;
@@ -415,12 +378,7 @@
}
if (memdesc->sg)
for_each_sg(memdesc->sg, sg, sglen, i)
- __free_page(sg_page(sg));
-
- if (memdesc->private)
- kgsl_process_sub_stats(memdesc->private, KGSL_MEM_ENTRY_PAGE_ALLOC, memdesc->size);
- else
- kgsl_driver.stats.page_alloc_kernel -= memdesc->size;
+ __free_pages(sg_page(sg), get_order(sg->length));
}
static int kgsl_contiguous_vmflags(struct kgsl_memdesc *memdesc)
@@ -428,29 +386,46 @@
return VM_RESERVED | VM_IO | VM_PFNMAP | VM_DONTEXPAND;
}
+/*
+ * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address
+ * space
+ *
+ * @memdesc - The memory descriptor which contains information about the memory
+ *
+ * Return: 0 on success else error code
+ */
static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc)
{
if (!memdesc->hostptr) {
pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
struct page **pages = NULL;
struct scatterlist *sg;
+ int npages = PAGE_ALIGN(memdesc->size) >> PAGE_SHIFT;
int sglen = memdesc->sglen;
- int i;
+ int i, count = 0;
-
- if (memdesc->flags & KGSL_MEMDESC_GUARD_PAGE)
+ /* Don't map the guard page if it exists */
+ if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)
sglen--;
-
- pages = vmalloc(sglen * sizeof(struct page *));
+ /* create a list of pages to call vmap */
+ pages = vmalloc(npages * sizeof(struct page *));
if (!pages) {
KGSL_CORE_ERR("vmalloc(%d) failed\n",
- sglen * sizeof(struct page *));
+ npages * sizeof(struct page *));
return -ENOMEM;
}
- for_each_sg(memdesc->sg, sg, sglen, i)
- pages[i] = sg_page(sg);
- memdesc->hostptr = vmap(pages, sglen,
+
+ for_each_sg(memdesc->sg, sg, sglen, i) {
+ struct page *page = sg_page(sg);
+ int j;
+
+ for (j = 0; j < sg->length >> PAGE_SHIFT; j++)
+ pages[count++] = page++;
+ }
+
+
+ memdesc->hostptr = vmap(pages, count,
VM_IOREMAP, page_prot);
KGSL_STATS_ADD(memdesc->size, kgsl_driver.stats.vmalloc,
kgsl_driver.stats.vmalloc_max);
@@ -493,6 +468,20 @@
free_contiguous_memory_by_paddr(memdesc->physaddr);
}
+static int kgsl_ebimem_map_kernel(struct kgsl_memdesc *memdesc)
+{
+ if (!memdesc->hostptr) {
+ memdesc->hostptr = ioremap(memdesc->physaddr, memdesc->size);
+ if (!memdesc->hostptr) {
+ KGSL_CORE_ERR("ioremap failed, addr:0x%p, size:0x%x\n",
+ memdesc->hostptr, memdesc->size);
+ return -ENOMEM;
+ }
+ }
+
+ return 0;
+}
+
static void kgsl_coherent_free(struct kgsl_memdesc *memdesc)
{
kgsl_driver.stats.coherent -= memdesc->size;
@@ -500,6 +489,7 @@
memdesc->hostptr, memdesc->physaddr);
}
+/* Global - also used by kgsl_drm.c */
struct kgsl_memdesc_ops kgsl_page_alloc_ops = {
.free = kgsl_page_alloc_free,
.vmflags = kgsl_page_alloc_vmflags,
@@ -508,19 +498,11 @@
};
EXPORT_SYMBOL(kgsl_page_alloc_ops);
-struct kgsl_memdesc_ops kgsl_ion_alloc_ops = {
- .free = kgsl_ion_alloc_free,
- .vmflags = kgsl_ion_alloc_vmflags,
- .vmfault = kgsl_ion_alloc_vmfault,
- .map_kernel_mem = kgsl_ion_alloc_map_kernel,
-};
-EXPORT_SYMBOL(kgsl_ion_alloc_ops);
-
-
static struct kgsl_memdesc_ops kgsl_ebimem_ops = {
.free = kgsl_ebimem_free,
.vmflags = kgsl_contiguous_vmflags,
.vmfault = kgsl_contiguous_vmfault,
+ .map_kernel_mem = kgsl_ebimem_map_kernel,
};
static struct kgsl_memdesc_ops kgsl_coherent_ops = {
@@ -553,87 +535,158 @@
struct kgsl_pagetable *pagetable,
size_t size, unsigned int protflags)
{
- int i, order, ret = 0;
- int sglen = PAGE_ALIGN(size) / PAGE_SIZE;
+ int pcount = 0, order, ret = 0;
+ int j, len, page_size, sglen_alloc, sglen = 0;
struct page **pages = NULL;
pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
void *ptr;
+ unsigned int align;
+ align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
+
+ page_size = (align >= ilog2(SZ_64K) && size >= SZ_64K)
+ ? SZ_64K : PAGE_SIZE;
+ /* update align flags for what we actually use */
+ kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+
+ /*
+ * There needs to be enough room in the sg structure to be able to
+ * service the allocation entirely with PAGE_SIZE sized chunks
+ */
+
+ sglen_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+ /*
+ * Add guard page to the end of the allocation when the
+ * IOMMU is in use.
+ */
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU)
- sglen++;
+ sglen_alloc++;
memdesc->size = size;
memdesc->pagetable = pagetable;
- memdesc->priv = KGSL_MEMFLAGS_CACHED;
memdesc->ops = &kgsl_page_alloc_ops;
- memdesc->sg = kgsl_sg_alloc(sglen);
+ memdesc->sg = kgsl_sg_alloc(sglen_alloc);
if (memdesc->sg == NULL) {
KGSL_CORE_ERR("vmalloc(%d) failed\n",
- sglen * sizeof(struct scatterlist));
+ sglen_alloc * sizeof(struct scatterlist));
ret = -ENOMEM;
goto done;
}
+ /*
+ * Allocate space to store the list of pages to send to vmap.
+ * This is an array of pointers so we can track 1024 pages per page of
+ * allocation which means we can handle up to a 8MB buffer request with
+ * two pages; well within the acceptable limits for using kmalloc.
+ */
- pages = kmalloc(sglen * sizeof(struct page *), GFP_KERNEL);
+ pages = kmalloc(sglen_alloc * sizeof(struct page *), GFP_KERNEL);
if (pages == NULL) {
KGSL_CORE_ERR("kmalloc (%d) failed\n",
- sglen * sizeof(struct page *));
+ sglen_alloc * sizeof(struct page *));
ret = -ENOMEM;
goto done;
}
kmemleak_not_leak(memdesc->sg);
- memdesc->sglen = sglen;
- sg_init_table(memdesc->sg, sglen);
+ memdesc->sglen_alloc = sglen_alloc;
+ sg_init_table(memdesc->sg, sglen_alloc);
- for (i = 0; i < PAGE_ALIGN(size) / PAGE_SIZE; i++) {
+ len = size;
+ while (len > 0) {
+ struct page *page;
+ unsigned int gfp_mask = GFP_KERNEL | __GFP_HIGHMEM |
+ __GFP_NOWARN;
+ int j;
- pages[i] = alloc_page(GFP_KERNEL | __GFP_HIGHMEM);
- if (pages[i] == NULL) {
+ /* don't waste space at the end of the allocation*/
+ if (len < page_size)
+ page_size = PAGE_SIZE;
+
+ if (page_size != PAGE_SIZE)
+ gfp_mask |= __GFP_COMP;
+
+ page = alloc_pages(gfp_mask, get_order(page_size));
+
+ if (page == NULL) {
+ if (page_size != PAGE_SIZE) {
+ page_size = PAGE_SIZE;
+ continue;
+ }
+
+ KGSL_CORE_ERR(
+ "Out of memory: only allocated %dKB of %dKB requested\n",
+ (size - len) >> 10, size >> 10);
+
ret = -ENOMEM;
- memdesc->sglen = i;
goto done;
}
- sg_set_page(&memdesc->sg[i], pages[i], PAGE_SIZE, 0);
+ for (j = 0; j < page_size >> PAGE_SHIFT; j++)
+ pages[pcount++] = nth_page(page, j);
+
+ sg_set_page(&memdesc->sg[sglen++], page, page_size, 0);
+ len -= page_size;
}
-
+ /* Add the guard page to the end of the sglist */
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_IOMMU) {
+ /*
+ * It doesn't matter if we use GFP_ZERO here, this never
+ * gets mapped, and we only allocate it once in the life
+ * of the system
+ */
if (kgsl_guard_page == NULL)
kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO |
__GFP_HIGHMEM);
if (kgsl_guard_page != NULL) {
- sg_set_page(&memdesc->sg[sglen - 1], kgsl_guard_page,
+ sg_set_page(&memdesc->sg[sglen++], kgsl_guard_page,
PAGE_SIZE, 0);
- memdesc->flags |= KGSL_MEMDESC_GUARD_PAGE;
- } else
- memdesc->sglen--;
+ memdesc->priv |= KGSL_MEMDESC_GUARD_PAGE;
+ }
}
+ memdesc->sglen = sglen;
- ptr = vmap(pages, i, VM_IOREMAP, page_prot);
+ /*
+ * All memory that goes to the user has to be zeroed out before it gets
+ * exposed to userspace. This means that the memory has to be mapped in
+ * the kernel, zeroed (memset) and then unmapped. This also means that
+ * the dcache has to be flushed to ensure coherency between the kernel
+ * and user pages. We used to pass __GFP_ZERO to alloc_page which mapped
+ * zeroed and unmaped each individual page, and then we had to turn
+ * around and call flush_dcache_page() on that page to clear the caches.
+ * This was killing us for performance. Instead, we found it is much
+ * faster to allocate the pages without GFP_ZERO, map the entire range,
+ * memset it, flush the range and then unmap - this results in a factor
+ * of 4 improvement for speed for large buffers. There is a small
+ * increase in speed for small buffers, but only on the order of a few
+ * microseconds at best. The only downside is that there needs to be
+ * enough temporary space in vmalloc to accomodate the map. This
+ * shouldn't be a problem, but if it happens, fall back to a much slower
+ * path
+ */
+
+ ptr = vmap(pages, pcount, VM_IOREMAP, page_prot);
if (ptr != NULL) {
memset(ptr, 0, memdesc->size);
dmac_flush_range(ptr, ptr + memdesc->size);
vunmap(ptr);
} else {
- int j;
+ /* Very, very, very slow path */
-
-
- for (j = 0; j < i; j++) {
+ for (j = 0; j < pcount; j++) {
ptr = kmap_atomic(pages[j]);
memset(ptr, 0, PAGE_SIZE);
dmac_flush_range(ptr, ptr + PAGE_SIZE);
@@ -649,6 +702,9 @@
if (ret)
goto done;
+ KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
+ kgsl_driver.stats.page_alloc_max);
+
order = get_order(size);
if (order < 16)
@@ -657,9 +713,6 @@
done:
kfree(pages);
- KGSL_STATS_ADD(size, kgsl_driver.stats.page_alloc,
- kgsl_driver.stats.page_alloc_max);
-
if (ret)
kgsl_sharedmem_free(memdesc);
@@ -675,191 +728,35 @@
size = ALIGN(size, PAGE_SIZE * 2);
- kgsl_driver.stats.page_alloc_kernel += size;
ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
if (!ret)
ret = kgsl_page_alloc_map_kernel(memdesc);
- if (ret) {
-
- kgsl_driver.stats.page_alloc_kernel += size;
+ if (ret)
kgsl_sharedmem_free(memdesc);
- }
return ret;
}
EXPORT_SYMBOL(kgsl_sharedmem_page_alloc);
int
kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
- struct kgsl_process_private *private,
struct kgsl_pagetable *pagetable,
- size_t size, int flags)
+ size_t size)
{
unsigned int protflags;
- int ret = 0;
if (size == 0)
return -EINVAL;
protflags = GSL_PT_PAGE_RV;
- if (!(flags & KGSL_MEMFLAGS_GPUREADONLY))
+ if (!(memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY))
protflags |= GSL_PT_PAGE_WV;
- ret = _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
- protflags);
-
- if (ret == 0 && private)
- kgsl_process_add_stats(private, KGSL_MEM_ENTRY_PAGE_ALLOC, size);
-
- return ret;
+ return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size,
+ protflags);
}
EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
-static int
-_kgsl_sharedmem_ion_alloc(struct kgsl_memdesc *memdesc,
- struct kgsl_pagetable *pagetable,
- size_t size, unsigned int protflags)
-{
- int order, ret = 0;
- int sglen = 1;
- void *ptr;
- struct ion_handle *handle = NULL;
- ion_phys_addr_t pa = 0;
- size_t len = 0;
-
-
-
-
-
- memdesc->size = size;
- memdesc->pagetable = pagetable;
- memdesc->priv = KGSL_MEMFLAGS_CACHED;
- memdesc->ops = &kgsl_ion_alloc_ops;
-
- memdesc->sg = kgsl_sg_alloc(sglen);
-
- if (memdesc->sg == NULL) {
- KGSL_CORE_ERR("kgsl_sg_alloc vmalloc(%d) failed\n",
- sglen * sizeof(struct scatterlist));
- ret = -ENOMEM;
- goto done;
- }
-
- kmemleak_not_leak(memdesc->sg);
-
- memdesc->sglen = sglen;
- sg_init_table(memdesc->sg, sglen);
-
- if (kgsl_client == NULL)
- kgsl_client = msm_ion_client_create(-1, "KGSL");
-
- handle = ion_alloc(kgsl_client, size, SZ_4K, 0x1 << ION_SF_HEAP_ID);
- if (IS_ERR_OR_NULL(handle)) {
- ret = -ENOMEM;
- goto done;
- }
-
- if (ion_phys(kgsl_client, handle, &pa, &len)) {
- KGSL_CORE_ERR("kgsl: ion_phys() failed\n");
- ret = -ENOMEM;
- goto done;
- }
-
- memdesc->handle = handle;
-
- memdesc->sg[0].length = memdesc->size;
- memdesc->sg[0].offset = 0;
- memdesc->sg[0].dma_address = pa;
-
-
-
-
-
- ptr = ioremap(pa, memdesc->size);
-
- if (ptr != NULL) {
- memset(ptr, 0, memdesc->size);
- dmac_flush_range(ptr, ptr + memdesc->size);
- iounmap(ptr);
- }
-
- outer_cache_range_op_sg(memdesc->sg, memdesc->sglen, KGSL_CACHE_OP_FLUSH);
-
- ret = kgsl_mmu_map(pagetable, memdesc, protflags);
-
- if (ret) {
- KGSL_CORE_ERR("kgsl: kgsl_mmu_map failed\n");
- ret = -ENOMEM;
- goto done;
- }
-
- order = get_order(size);
-
- if (order < 16)
- kgsl_driver.stats.histogram[order]++;
-
-done:
- KGSL_STATS_ADD(size, kgsl_driver.stats.pre_alloc, kgsl_driver.stats.pre_alloc_max);
-
- if (ret)
- kgsl_sharedmem_free(memdesc);
-
- return ret;
-}
-
-int
-kgsl_sharedmem_ion_alloc(struct kgsl_memdesc *memdesc,
- struct kgsl_pagetable *pagetable,
- size_t size)
-{
- int ret;
-
- BUG_ON(size == 0);
- size = PAGE_ALIGN(size);
-
- kgsl_driver.stats.pre_alloc_kernel += size;
- ret = _kgsl_sharedmem_ion_alloc(memdesc, pagetable, size,
- GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
-
- if (!ret)
- ret = kgsl_ion_alloc_map_kernel(memdesc);
-
- if (ret) {
-
- kgsl_driver.stats.pre_alloc_kernel += size;
- kgsl_sharedmem_free(memdesc);
- }
- return ret;
-}
-EXPORT_SYMBOL(kgsl_sharedmem_ion_alloc);
-
-int
-kgsl_sharedmem_ion_alloc_user(struct kgsl_memdesc *memdesc,
- struct kgsl_process_private *private,
- struct kgsl_pagetable *pagetable,
- size_t size, int flags)
-{
- unsigned int protflags;
- int ret = 0;
-
- BUG_ON(size == 0);
-
- size = PAGE_ALIGN(size);
-
- protflags = GSL_PT_PAGE_RV;
- if (!(flags & KGSL_MEMFLAGS_GPUREADONLY))
- protflags |= GSL_PT_PAGE_WV;
-
- ret = _kgsl_sharedmem_ion_alloc(memdesc, pagetable, size,
- protflags);
-
- if (ret == 0 && private)
- kgsl_process_add_stats(private, KGSL_MEM_ENTRY_PRE_ALLOC, size);
-
- return ret;
-}
-EXPORT_SYMBOL(kgsl_sharedmem_ion_alloc_user);
-
int
kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size)
{
@@ -882,7 +779,7 @@
if (result)
goto err;
-
+ /* Record statistics */
KGSL_STATS_ADD(size, kgsl_driver.stats.coherent,
kgsl_driver.stats.coherent_max);
@@ -906,7 +803,7 @@
if (memdesc->ops && memdesc->ops->free)
memdesc->ops->free(memdesc);
- kgsl_sg_free(memdesc->sg, memdesc->sglen);
+ kgsl_sg_free(memdesc->sg, memdesc->sglen_alloc);
memset(memdesc, 0, sizeof(*memdesc));
}
@@ -953,7 +850,7 @@
int
kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable,
- size_t size, int flags)
+ size_t size)
{
size = ALIGN(size, PAGE_SIZE);
return _kgsl_sharedmem_ebimem(memdesc, pagetable, size);
@@ -1039,6 +936,15 @@
}
EXPORT_SYMBOL(kgsl_sharedmem_set);
+/*
+ * kgsl_sharedmem_map_vma - Map a user vma to physical memory
+ *
+ * @vma - The user vma to map
+ * @memdesc - The memory descriptor which contains information about the
+ * physical memory
+ *
+ * Return: 0 on success else error code
+ */
int
kgsl_sharedmem_map_vma(struct vm_area_struct *vma,
const struct kgsl_memdesc *memdesc)
@@ -1059,3 +965,42 @@
return 0;
}
EXPORT_SYMBOL(kgsl_sharedmem_map_vma);
+
+static const char * const memtype_str[] = {
+ [KGSL_MEMTYPE_OBJECTANY] = "any(0)",
+ [KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer",
+ [KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer",
+ [KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer",
+ [KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer",
+ [KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer",
+ [KGSL_MEMTYPE_TEXTURE] = "texture",
+ [KGSL_MEMTYPE_SURFACE] = "surface",
+ [KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface",
+ [KGSL_MEMTYPE_GL] = "gl",
+ [KGSL_MEMTYPE_CL] = "cl",
+ [KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map",
+ [KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap",
+ [KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map",
+ [KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap",
+ [KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack",
+ [KGSL_MEMTYPE_COMMAND] = "command",
+ [KGSL_MEMTYPE_2D] = "2d",
+ [KGSL_MEMTYPE_EGL_IMAGE] = "egl_image",
+ [KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow",
+ [KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample",
+ /* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */
+};
+
+void kgsl_get_memory_usage(char *name, size_t name_size, unsigned int memflags)
+{
+ unsigned char type;
+
+ type = (memflags & KGSL_MEMTYPE_MASK) >> KGSL_MEMTYPE_SHIFT;
+ if (type == KGSL_MEMTYPE_KERNEL)
+ strlcpy(name, "kernel", name_size);
+ else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL)
+ strlcpy(name, memtype_str[type], name_size);
+ else
+ snprintf(name, name_size, "unknown(%3d)", type);
+}
+EXPORT_SYMBOL(kgsl_get_memory_usage);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
index 9c7eb70..3109ef2 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,7 +19,8 @@
#include "kgsl_mmu.h"
#include <linux/slab.h>
#include <linux/kmemleak.h>
-#include <linux/sched.h>
+
+#include "kgsl_log.h"
struct kgsl_device;
struct kgsl_process_private;
@@ -28,33 +29,20 @@
#define KGSL_CACHE_OP_FLUSH 0x02
#define KGSL_CACHE_OP_CLEAN 0x03
-#define KGSL_MEMFLAGS_CACHED 0x00000001
-#define KGSL_MEMFLAGS_GLOBAL 0x00000002
-
extern struct kgsl_memdesc_ops kgsl_page_alloc_ops;
-int kgsl_sharedmem_ion_alloc(struct kgsl_memdesc *memdesc,
- struct kgsl_pagetable *pagetable, size_t size);
-
-int kgsl_sharedmem_ion_alloc_user(struct kgsl_memdesc *memdesc,
- struct kgsl_process_private *private,
- struct kgsl_pagetable *pagetable,
- size_t size, int flags);
-
-
int kgsl_sharedmem_page_alloc(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable, size_t size);
int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
- struct kgsl_process_private *private,
struct kgsl_pagetable *pagetable,
- size_t size, int flags);
+ size_t size);
int kgsl_sharedmem_alloc_coherent(struct kgsl_memdesc *memdesc, size_t size);
int kgsl_sharedmem_ebimem_user(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable,
- size_t size, int flags);
+ size_t size);
int kgsl_sharedmem_ebimem(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable,
@@ -82,8 +70,42 @@
int kgsl_sharedmem_init_sysfs(void);
void kgsl_sharedmem_uninit_sysfs(void);
+/*
+ * kgsl_memdesc_get_align - Get alignment flags from a memdesc
+ * @memdesc - the memdesc
+ *
+ * Returns the alignment requested, as power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
+}
+
+/*
+ * kgsl_memdesc_set_align - Set alignment flags of a memdesc
+ * @memdesc - the memdesc
+ * @align - alignment requested, as a power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align)
+{
+ if (align > 32) {
+ KGSL_CORE_ERR("Alignment too big, restricting to 2^32\n");
+ align = 32;
+ }
+
+ memdesc->flags &= ~KGSL_MEMALIGN_MASK;
+ memdesc->flags |= (align << KGSL_MEMALIGN_SHIFT) & KGSL_MEMALIGN_MASK;
+ return 0;
+}
+
static inline unsigned int kgsl_get_sg_pa(struct scatterlist *sg)
{
+ /*
+ * Try sg_dma_address first to support ion carveout
+ * regions which do not work with sg_phys().
+ */
unsigned int pa = sg_dma_address(sg);
if (pa == 0)
pa = sg_phys(sg);
@@ -94,6 +116,12 @@
kgsl_sharedmem_map_vma(struct vm_area_struct *vma,
const struct kgsl_memdesc *memdesc);
+/*
+ * For relatively small sglists, it is preferable to use kzalloc
+ * rather than going down the vmalloc rat hole. If the size of
+ * the sglist is < PAGE_SIZE use kzalloc otherwise fallback to
+ * vmalloc
+ */
static inline void *kgsl_sg_alloc(unsigned int sglen)
{
@@ -116,7 +144,7 @@
unsigned int physaddr, unsigned int size)
{
memdesc->sg = kgsl_sg_alloc(1);
- if (memdesc->sg == NULL)
+ if (!memdesc->sg)
return -ENOMEM;
kmemleak_not_leak(memdesc->sg);
@@ -133,37 +161,25 @@
kgsl_allocate(struct kgsl_memdesc *memdesc,
struct kgsl_pagetable *pagetable, size_t size)
{
- int ret = 1;
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
return kgsl_sharedmem_ebimem(memdesc, pagetable, size);
-
- if(size >= SZ_4M)
- ret = kgsl_sharedmem_ion_alloc(memdesc, pagetable, size);
-
- if(ret)
- return kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
- return ret;
+ memdesc->flags |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
+ return kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
}
static inline int
kgsl_allocate_user(struct kgsl_memdesc *memdesc,
- struct kgsl_process_private *private,
struct kgsl_pagetable *pagetable,
size_t size, unsigned int flags)
{
- int ret = 1;
- char task_comm[TASK_COMM_LEN];
+ int ret;
+
+ memdesc->flags = flags;
if (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE)
- return kgsl_sharedmem_ebimem_user(memdesc, pagetable, size,
- flags);
- if(size >= SZ_4M)
- ret = kgsl_sharedmem_ion_alloc_user(memdesc, private, pagetable, size, flags);
- else if ( size >= SZ_1M && strcmp("om.htc.launcher", get_task_comm(task_comm, current->group_leader)) == 0 )
- ret = kgsl_sharedmem_ion_alloc_user(memdesc, private, pagetable, size, flags);
-
- if(ret)
- return kgsl_sharedmem_page_alloc_user(memdesc, private, pagetable, size, flags);
+ ret = kgsl_sharedmem_ebimem_user(memdesc, pagetable, size);
+ else
+ ret = kgsl_sharedmem_page_alloc_user(memdesc, pagetable, size);
return ret;
}
@@ -174,6 +190,8 @@
int ret = kgsl_sharedmem_alloc_coherent(memdesc, size);
if (!ret && (kgsl_mmu_get_mmutype() == KGSL_MMU_TYPE_NONE))
memdesc->gpuaddr = memdesc->physaddr;
+
+ memdesc->flags |= (KGSL_MEMTYPE_KERNEL << KGSL_MEMTYPE_SHIFT);
return ret;
}
@@ -188,4 +206,4 @@
return size;
}
-#endif
+#endif /* __KGSL_SHAREDMEM_H */
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index 9704e2b..a5aa42f 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -23,6 +23,7 @@
#include "kgsl_sharedmem.h"
#include "kgsl_snapshot.h"
+/* Placeholder for the list of memory objects frozen after a hang */
struct kgsl_snapshot_object {
unsigned int gpuaddr;
@@ -35,10 +36,10 @@
};
struct snapshot_obj_itr {
- void *buf;
- int pos;
- loff_t offset;
- size_t remain;
+ void *buf; /* Buffer pointer to write to */
+ int pos; /* Current position in the sequence */
+ loff_t offset; /* file offset to start writing from */
+ size_t remain; /* Bytes remaining in buffer */
size_t write; /* Bytes written so far */
};
@@ -60,13 +61,13 @@
if ((itr->pos + size) <= itr->offset)
goto done;
-
+ /* Handle the case that offset is in the middle of the buffer */
if (itr->offset > itr->pos) {
src += (itr->offset - itr->pos);
size -= (itr->offset - itr->pos);
-
+ /* Advance pos to the offset start */
itr->pos = itr->offset;
}
@@ -84,6 +85,7 @@
return size;
}
+/* idr_for_each function to count the number of contexts */
static int snapshot_context_count(int id, void *ptr, void *data)
{
@@ -93,6 +95,10 @@
return 0;
}
+/*
+ * To simplify the iterator loop use a global pointer instead of trying
+ * to pass around double star references to the snapshot data
+ */
static void *_ctxtptr;
@@ -104,6 +110,9 @@
header->id = id;
+ /* Future-proof for per-context timestamps - for now, just
+ * return the global timestamp for all contexts
+ */
header->timestamp_queued = kgsl_readtimestamp(device, context,
KGSL_TIMESTAMP_QUEUED);
@@ -115,6 +124,7 @@
return 0;
}
+/* Snapshot the Linux specific information */
static int snapshot_os(struct kgsl_device *device,
void *snapshot, int remain, void *priv)
{
@@ -126,12 +136,14 @@
int ctxtcount = 0;
int size = sizeof(*header);
+ /* Figure out how many active contexts there are - these will
+ * be appended on the end of the structure */
idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context);
-
+ /* Make sure there is enough room for the data */
if (remain < size) {
SNAPSHOT_ERR_NOMEM(device, "OS");
return 0;
@@ -143,27 +155,28 @@
header->state = hang ? SNAPSHOT_STATE_HUNG : SNAPSHOT_STATE_RUNNING;
-
+ /* Get the kernel build information */
strlcpy(header->release, utsname()->release, sizeof(header->release));
strlcpy(header->version, utsname()->version, sizeof(header->version));
-
+ /* Get the Unix time for the timestamp */
header->seconds = get_seconds();
-
+ /* Remember the power information */
header->power_flags = pwr->power_flags;
header->power_level = pwr->active_pwrlevel;
header->power_interval_timeout = pwr->interval_timeout;
header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
header->busclk = kgsl_get_clkrate(pwr->ebi1_clk);
-
+ /* Future proof for per-context timestamps */
header->current_context = -1;
-
+ /* Get the current PT base */
header->ptbase = kgsl_mmu_get_current_ptbase(&device->mmu);
-
- pid = header->pid = kgsl_mmu_get_ptname_from_ptbase(header->ptbase);
+ /* And the PID for the task leader */
+ pid = header->pid = kgsl_mmu_get_ptname_from_ptbase(&device->mmu,
+ header->ptbase);
task = find_task_by_vpid(pid);
@@ -172,13 +185,23 @@
header->ctxtcount = ctxtcount;
-
+ /* append information for each context */
_ctxtptr = snapshot + sizeof(*header);
idr_for_each(&device->context_idr, snapshot_context_info, NULL);
-
+ /* Return the size of the data segment */
return size;
}
+/*
+ * kgsl_snapshot_dump_indexed_regs - helper function to dump indexed registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_indexed_registers data
+ *
+ * Given a indexed register cmd/data pair and a count, dump each indexed
+ * register
+ */
static int kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device,
void *snapshot, int remain, void *priv)
@@ -220,6 +243,10 @@
sect.magic = SNAPSHOT_SECTION_MAGIC;
sect.id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT;
+ /*
+ * Header size is in dwords, object size is in bytes -
+ * round up if the object size isn't dword aligned
+ */
sect.size = GPU_OBJ_HEADER_SZ + ALIGN(obj->size, 4);
@@ -241,7 +268,7 @@
if (ret == 0)
return 0;
-
+ /* Pad the end to a dword boundary if we need to */
if (obj->size % 4) {
unsigned int dummy = 0;
@@ -262,25 +289,15 @@
kfree(obj);
}
-#if 0
-int kgsl_snapshot_have_object(struct kgsl_device *device, unsigned int ptbase,
- unsigned int gpuaddr, unsigned int size)
-{
- struct kgsl_snapshot_object *obj;
-
- list_for_each_entry(obj, &device->snapshot_obj_list, node) {
- if (obj->ptbase != ptbase)
- continue;
-
- if ((gpuaddr >= obj->gpuaddr) &&
- ((gpuaddr + size) <= (obj->gpuaddr + obj->size)))
- return 1;
- }
-
- return 0;
-}
-#endif
-
+/* ksgl_snapshot_have_object - Return 1 if the object has been processed
+ *@device - the device that is being snapshotted
+ * @ptbase - the pagetable base of the object to freeze
+ * @gpuaddr - The gpu address of the object to freeze
+ * @size - the size of the object (may not always be the size of the region)
+ *
+ * Return 1 if the object is already in the list - this can save us from
+ * having to parse the sme thing over again.
+*/
int kgsl_snapshot_have_object(struct kgsl_device *device, unsigned int ptbase,
unsigned int gpuaddr, unsigned int size)
{
@@ -298,6 +315,17 @@
return 0;
}
+/* kgsl_snapshot_get_object - Mark a GPU buffer to be frozen
+ * @device - the device that is being snapshotted
+ * @ptbase - the pagetable base of the object to freeze
+ * @gpuaddr - The gpu address of the object to freeze
+ * @size - the size of the object (may not always be the size of the region)
+ * @type - the type of object being saved (shader, vbo, etc)
+ *
+ * Mark and freeze a GPU buffer object. This will prevent it from being
+ * freed until it can be copied out as part of the snapshot dump. Returns the
+ * size of the object being frozen
+ */
int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase,
unsigned int gpuaddr, unsigned int size, unsigned int type)
@@ -306,27 +334,33 @@
struct kgsl_snapshot_object *obj;
int offset;
- entry = kgsl_get_mem_entry(ptbase, gpuaddr, size);
+ entry = kgsl_get_mem_entry(device, ptbase, gpuaddr, size);
if (entry == NULL) {
KGSL_DRV_ERR(device, "Unable to find GPU buffer %8.8X\n",
gpuaddr);
- return 0;
+ return -EINVAL;
}
-
+ /* We can't freeze external memory, because we don't own it */
if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) {
KGSL_DRV_ERR(device,
"Only internal GPU buffers can be frozen\n");
- return 0;
+ return -EINVAL;
}
+ /*
+ * size indicates the number of bytes in the region to save. This might
+ * not always be the entire size of the region because some buffers are
+ * sub-allocated from a larger region. However, if size 0 was passed
+ * thats a flag that the caller wants to capture the entire buffer
+ */
if (size == 0) {
size = entry->memdesc.size;
offset = 0;
-
+ /* Adjust the gpuaddr to the start of the object */
gpuaddr = entry->memdesc.gpuaddr;
} else {
offset = gpuaddr - entry->memdesc.gpuaddr;
@@ -335,13 +369,13 @@
if (size + offset > entry->memdesc.size) {
KGSL_DRV_ERR(device, "Invalid size for GPU buffer %8.8X\n",
gpuaddr);
- return 0;
+ return -EINVAL;
}
-
+ /* If the buffer is already on the list, skip it */
list_for_each_entry(obj, &device->snapshot_obj_list, node) {
if (obj->gpuaddr == gpuaddr && obj->ptbase == ptbase) {
-
+ /* If the size is different, use the new size */
if (obj->size != size)
obj->size = size;
@@ -352,17 +386,17 @@
if (kgsl_memdesc_map(&entry->memdesc) == NULL) {
KGSL_DRV_ERR(device, "Unable to map GPU buffer %X\n",
gpuaddr);
- return 0;
+ return -EINVAL;
}
obj = kzalloc(sizeof(*obj), GFP_KERNEL);
if (obj == NULL) {
KGSL_DRV_ERR(device, "Unable to allocate memory\n");
- return 0;
+ return -EINVAL;
}
-
+ /* Ref count the mem entry */
kgsl_mem_entry_get(entry);
obj->type = type;
@@ -374,6 +408,13 @@
list_add(&obj->node, &device->snapshot_obj_list);
+ /*
+ * Return the size of the entire mem entry that was frozen - this gets
+ * used for tracking how much memory is frozen for a hang. Also, mark
+ * the memory entry as frozen. If the entry was already marked as
+ * frozen, then another buffer already got to it. In that case, return
+ * 0 so it doesn't get counted twice
+ */
if (entry->flags & KGSL_MEM_ENTRY_FROZEN)
return 0;
@@ -384,21 +425,39 @@
}
EXPORT_SYMBOL(kgsl_snapshot_get_object);
+/*
+ * kgsl_snapshot_dump_regs - helper function to dump device registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_registers data
+ *
+ * Given an array of register ranges pairs (start,end [inclusive]), dump the
+ * registers into a snapshot register section. The snapshot region stores a
+ * part of dwords for each register - the word address of the register, and
+ * the value.
+ */
int kgsl_snapshot_dump_regs(struct kgsl_device *device, void *snapshot,
int remain, void *priv)
{
+ struct kgsl_snapshot_registers_list *list = priv;
+
struct kgsl_snapshot_regs *header = snapshot;
- struct kgsl_snapshot_registers *regs = priv;
+ struct kgsl_snapshot_registers *regs;
unsigned int *data = snapshot + sizeof(*header);
- int count = 0, i, j;
+ int count = 0, i, j, k;
-
+ /* Figure out how many registers we are going to dump */
- for (i = 0; i < regs->count; i++) {
- int start = regs->regs[i * 2];
- int end = regs->regs[i * 2 + 1];
+ for (i = 0; i < list->count; i++) {
+ regs = &(list->registers[i]);
- count += (end - start + 1);
+ for (j = 0; j < regs->count; j++) {
+ int start = regs->regs[j * 2];
+ int end = regs->regs[j * 2 + 1];
+
+ count += (end - start + 1);
+ }
}
if (remain < (count * 8) + sizeof(*header)) {
@@ -406,22 +465,26 @@
return 0;
}
- for (i = 0; i < regs->count; i++) {
- unsigned int start = regs->regs[i * 2];
- unsigned int end = regs->regs[i * 2 + 1];
- for (j = start; j <= end; j++) {
- unsigned int val;
+ for (i = 0; i < list->count; i++) {
+ regs = &(list->registers[i]);
+ for (j = 0; j < regs->count; j++) {
+ unsigned int start = regs->regs[j * 2];
+ unsigned int end = regs->regs[j * 2 + 1];
- kgsl_regread(device, j, &val);
- *data++ = j;
- *data++ = val;
+ for (k = start; k <= end; k++) {
+ unsigned int val;
+
+ kgsl_regread(device, k, &val);
+ *data++ = k;
+ *data++ = val;
+ }
}
}
header->count = count;
-
+ /* Return the size of the section */
return (count * 8) + sizeof(*header);
}
EXPORT_SYMBOL(kgsl_snapshot_dump_regs);
@@ -443,15 +506,28 @@
}
EXPORT_SYMBOL(kgsl_snapshot_indexed_registers);
+/*
+ * kgsl_snapshot - construct a device snapshot
+ * @device - device to snapshot
+ * @hang - set to 1 if the snapshot was triggered following a hnag
+ * Given a device, construct a binary snapshot dump of the current device state
+ * and store it in the device snapshot memory.
+ */
int kgsl_device_snapshot(struct kgsl_device *device, int hang)
{
struct kgsl_snapshot_header *header = device->snapshot;
int remain = device->snapshot_maxsize - sizeof(*header);
void *snapshot;
- struct platform_device *pdev =
- container_of(device->parentdev, struct platform_device, dev);
- struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
+ struct timespec boot;
+ /*
+ * The first hang is always the one we are interested in. To
+ * avoid a subsequent hang blowing away the first, the snapshot
+ * is frozen until it is dumped via sysfs.
+ *
+ * Note that triggered snapshots are always taken regardless
+ * of the state and never frozen.
+ */
if (hang && device->snapshot_frozen == 1)
return 0;
@@ -472,34 +548,40 @@
header->gpuid = kgsl_gpuid(device, &header->chipid);
-
+ /* Get a pointer to the first section (right after the header) */
snapshot = ((void *) device->snapshot) + sizeof(*header);
-
+ /* Build the Linux specific header */
snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
snapshot, &remain, snapshot_os, (void *) hang);
-
+ /* Get the device specific sections */
if (device->ftbl->snapshot)
snapshot = device->ftbl->snapshot(device, snapshot, &remain,
hang);
- device->snapshot_timestamp = get_seconds();
+ /*
+ * The timestamp is the seconds since boot so it is easier to match to
+ * the kernel log
+ */
+
+ getboottime(&boot);
+ device->snapshot_timestamp = get_seconds() - boot.tv_sec;
device->snapshot_size = (int) (snapshot - device->snapshot);
-
+ /* Freeze the snapshot on a hang until it gets read */
device->snapshot_frozen = (hang) ? 1 : 0;
-
- KGSL_DRV_ERR(device,"snapshot created at va %p pa %x size %d\n",
- device->snapshot, pdata->snapshot_address,
- device->snapshot_size);
+ /* log buffer info to aid in ramdump fault tolerance */
+ KGSL_DRV_ERR(device, "snapshot created at pa %lx size %d\n",
+ __pa(device->snapshot), device->snapshot_size);
if (hang)
sysfs_notify(&device->snapshot_kobj, NULL, "timestamp");
return 0;
}
EXPORT_SYMBOL(kgsl_device_snapshot);
+/* An attribute for showing snapshot details */
struct kgsl_snapshot_attribute {
struct attribute attr;
ssize_t (*show)(struct kgsl_device *device, char *buf);
@@ -513,6 +595,7 @@
#define kobj_to_device(a) \
container_of(a, struct kgsl_device, snapshot_kobj)
+/* Dump the sysfs binary data to the user */
static ssize_t snapshot_show(struct file *filep, struct kobject *kobj,
struct bin_attribute *attr, char *buf, loff_t off,
size_t count)
@@ -526,11 +609,11 @@
if (device == NULL)
return 0;
-
+ /* Return nothing if we haven't taken a snapshot yet */
if (device->snapshot_timestamp == 0)
return 0;
-
+ /* Get the mutex to keep things from changing while we are dumping */
mutex_lock(&device->mutex);
obj_itr_init(&itr, buf, off, count);
@@ -575,11 +658,13 @@
return itr.write;
}
+/* Show the timestamp of the last collected snapshot */
static ssize_t timestamp_show(struct kgsl_device *device, char *buf)
{
- return snprintf(buf, PAGE_SIZE, "%x\n", device->snapshot_timestamp);
+ return snprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_timestamp);
}
+/* manually trigger a new snapshot to be collected */
static ssize_t trigger_store(struct kgsl_device *device, const char *buf,
size_t count)
{
@@ -592,23 +677,6 @@
return count;
}
-static ssize_t no_panic_show(struct kgsl_device *device, char *buf)
-{
- return snprintf(buf, PAGE_SIZE, "%x\n", device->snapshot_no_panic);
-}
-
-static ssize_t no_panic_store(struct kgsl_device *device, const char *buf,
- size_t count)
-{
- if (device && count > 0) {
- mutex_lock(&device->mutex);
- device->snapshot_no_panic = simple_strtol(buf, NULL, 10);
- mutex_unlock(&device->mutex);
- }
-
- return count;
-}
-
static struct bin_attribute snapshot_attr = {
.attr.name = "dump",
.attr.mode = 0444,
@@ -625,7 +693,6 @@
SNAPSHOT_ATTR(trigger, 0600, NULL, trigger_store);
SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL);
-SNAPSHOT_ATTR(no_panic, 0644, no_panic_show, no_panic_store);
static void snapshot_sysfs_release(struct kobject *kobj)
{
@@ -672,21 +739,19 @@
.release = snapshot_sysfs_release,
};
+/* kgsl_device_snapshot_init - Add resources for the device GPU snapshot
+ * @device - The device to initalize
+ *
+ * Allocate memory for a GPU snapshot for the specified device,
+ * and create the sysfs files to manage it
+ */
int kgsl_device_snapshot_init(struct kgsl_device *device)
{
int ret;
- struct platform_device *pdev =
- container_of(device->parentdev, struct platform_device, dev);
- struct kgsl_device_platform_data *pdata = pdev->dev.platform_data;
- if (device->snapshot == NULL) {
- if(pdata->snapshot_address) {
- device->snapshot = ioremap(pdata->snapshot_address, KGSL_SNAPSHOT_MEMSIZE);
- KGSL_DRV_INFO(device, "snapshot created at va %p pa %x\n", device->snapshot, pdata->snapshot_address);
- } else
- device->snapshot = kzalloc(KGSL_SNAPSHOT_MEMSIZE, GFP_KERNEL);
- }
+ if (device->snapshot == NULL)
+ device->snapshot = kzalloc(KGSL_SNAPSHOT_MEMSIZE, GFP_KERNEL);
if (device->snapshot == NULL)
return -ENOMEM;
@@ -710,16 +775,18 @@
goto done;
ret = sysfs_create_file(&device->snapshot_kobj, &attr_timestamp.attr);
- if (ret)
- goto done;
-
- ret = sysfs_create_file(&device->snapshot_kobj, &attr_no_panic.attr);
done:
return ret;
}
EXPORT_SYMBOL(kgsl_device_snapshot_init);
+/* kgsl_device_snapshot_close - Take down snapshot memory for a device
+ * @device - Pointer to the kgsl_device
+ *
+ * Remove the sysfs files and free the memory allocated for the GPU
+ * snapshot
+ */
void kgsl_device_snapshot_close(struct kgsl_device *device)
{
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
index baee17d..327d18a 100644
--- a/drivers/gpu/msm/kgsl_snapshot.h
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -16,25 +16,33 @@
#include <linux/types.h>
+/* Snapshot header */
+/* High word is static, low word is snapshot version ID */
#define SNAPSHOT_MAGIC 0x504D0002
+/* GPU ID scheme:
+ * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D)
+ * [00:16] - GPU specific identifier
+ */
struct kgsl_snapshot_header {
- __u32 magic;
- __u32 gpuid;
-
- __u32 chipid;
+ __u32 magic; /* Magic identifier */
+ __u32 gpuid; /* GPU ID - see above */
+ /* Added in snapshot version 2 */
+ __u32 chipid; /* Chip ID from the GPU */
} __packed;
+/* Section header */
#define SNAPSHOT_SECTION_MAGIC 0xABCD
struct kgsl_snapshot_section_header {
- __u16 magic;
- __u16 id;
- __u32 size;
+ __u16 magic; /* Magic identifier */
+ __u16 id; /* Type of section */
+ __u32 size; /* Size of the section including this header */
} __packed;
+/* Section identifiers */
#define KGSL_SNAPSHOT_SECTION_OS 0x0101
#define KGSL_SNAPSHOT_SECTION_REGS 0x0201
#define KGSL_SNAPSHOT_SECTION_RB 0x0301
@@ -47,89 +55,105 @@
#define KGSL_SNAPSHOT_SECTION_END 0xFFFF
+/* OS sub-section header */
#define KGSL_SNAPSHOT_OS_LINUX 0x0001
+/* Linux OS specific information */
#define SNAPSHOT_STATE_HUNG 0
#define SNAPSHOT_STATE_RUNNING 1
struct kgsl_snapshot_linux {
- int osid;
- int state;
- __u32 seconds;
- __u32 power_flags;
- __u32 power_level;
- __u32 power_interval_timeout;
- __u32 grpclk;
- __u32 busclk;
- __u32 ptbase;
- __u32 pid;
- __u32 current_context;
- __u32 ctxtcount;
- unsigned char release[32];
- unsigned char version[32];
- unsigned char comm[16];
+ int osid; /* subsection OS identifier */
+ int state; /* 1 if the thread is running, 0 for hung */
+ __u32 seconds; /* Unix timestamp for the snapshot */
+ __u32 power_flags; /* Current power flags */
+ __u32 power_level; /* Current power level */
+ __u32 power_interval_timeout; /* Power interval timeout */
+ __u32 grpclk; /* Current GP clock value */
+ __u32 busclk; /* Current busclk value */
+ __u32 ptbase; /* Current ptbase */
+ __u32 pid; /* PID of the process that owns the PT */
+ __u32 current_context; /* ID of the current context */
+ __u32 ctxtcount; /* Number of contexts appended to section */
+ unsigned char release[32]; /* kernel release */
+ unsigned char version[32]; /* kernel version */
+ unsigned char comm[16]; /* Name of the process that owns the PT */
} __packed;
+/*
+ * This structure contains a record of an active context.
+ * These are appended one after another in the OS section below
+ * the header above
+ */
struct kgsl_snapshot_linux_context {
- __u32 id;
- __u32 timestamp_queued;
- __u32 timestamp_retired;
+ __u32 id; /* The context ID */
+ __u32 timestamp_queued; /* The last queued timestamp */
+ __u32 timestamp_retired; /* The last timestamp retired by HW */
};
+/* Ringbuffer sub-section header */
struct kgsl_snapshot_rb {
- int start;
- int end;
- int rbsize;
- int wptr;
- int rptr;
- int count;
+ int start; /* dword at the start of the dump */
+ int end; /* dword at the end of the dump */
+ int rbsize; /* Size (in dwords) of the ringbuffer */
+ int wptr; /* Current index of the CPU write pointer */
+ int rptr; /* Current index of the GPU read pointer */
+ int count; /* Number of dwords in the dump */
} __packed;
+/* Indirect buffer sub-section header */
struct kgsl_snapshot_ib {
- __u32 gpuaddr;
- __u32 ptbase;
- int size;
+ __u32 gpuaddr; /* GPU address of the the IB */
+ __u32 ptbase; /* Base for the pagetable the GPU address is valid in */
+ int size; /* Size of the IB */
} __packed;
+/* Register sub-section header */
struct kgsl_snapshot_regs {
- __u32 count;
+ __u32 count; /* Number of register pairs in the section */
} __packed;
+/* Indexed register sub-section header */
struct kgsl_snapshot_indexed_regs {
- __u32 index_reg;
- __u32 data_reg;
- int start;
- int count;
+ __u32 index_reg; /* Offset of the index register for this section */
+ __u32 data_reg; /* Offset of the data register for this section */
+ int start; /* Starting index */
+ int count; /* Number of dwords in the data */
} __packed;
+/* Istore sub-section header */
struct kgsl_snapshot_istore {
- int count;
+ int count; /* Number of instructions in the istore */
} __packed;
+/* Debug data sub-section header */
+/* A2XX debug sections */
#define SNAPSHOT_DEBUG_SX 1
#define SNAPSHOT_DEBUG_CP 2
#define SNAPSHOT_DEBUG_SQ 3
#define SNAPSHOT_DEBUG_SQTHREAD 4
#define SNAPSHOT_DEBUG_MIU 5
+/* A3XX debug sections */
#define SNAPSHOT_DEBUG_VPC_MEMORY 6
#define SNAPSHOT_DEBUG_CP_MEQ 7
#define SNAPSHOT_DEBUG_CP_PM4_RAM 8
#define SNAPSHOT_DEBUG_CP_PFP_RAM 9
#define SNAPSHOT_DEBUG_CP_ROQ 10
#define SNAPSHOT_DEBUG_SHADER_MEMORY 11
+#define SNAPSHOT_DEBUG_CP_MERCIU 12
struct kgsl_snapshot_debug {
- int type;
- int size;
+ int type; /* Type identifier for the attached tata */
+ int size; /* Size of the section in dwords */
} __packed;
struct kgsl_snapshot_debugbus {
- int id;
- int count;
+ int id; /* Debug bus ID */
+ int count; /* Number of dwords in the dump */
} __packed;
#define SNAPSHOT_GPU_OBJECT_SHADER 1
@@ -137,22 +161,40 @@
#define SNAPSHOT_GPU_OBJECT_GENERIC 3
struct kgsl_snapshot_gpu_object {
- int type;
- __u32 gpuaddr;
- __u32 ptbase;
- int size;
+ int type; /* Type of GPU object */
+ __u32 gpuaddr; /* GPU address of the the object */
+ __u32 ptbase; /* Base for the pagetable the GPU address is valid in */
+ int size; /* Size of the object (in dwords) */
};
#ifdef __KERNEL__
+/* Allocate 512K for each device snapshot */
#define KGSL_SNAPSHOT_MEMSIZE (512 * 1024)
struct kgsl_device;
+/*
+ * A helper macro to print out "not enough memory functions" - this
+ * makes it easy to standardize the messages as well as cut down on
+ * the number of strings in the binary
+ */
#define SNAPSHOT_ERR_NOMEM(_d, _s) \
KGSL_DRV_ERR((_d), \
"snapshot: not enough snapshot memory for section %s\n", (_s))
+/*
+ * kgsl_snapshot_add_section - Add a new section to the GPU snapshot
+ * @device - the KGSL device being snapshotted
+ * @id - the section id
+ * @snapshot - pointer to the memory for the snapshot
+ * @remain - pointer to the number of bytes left in the snapshot region
+ * @func - Function pointer to fill the section
+ * @priv - Priv pointer to pass to the function
+ *
+ * Set up a KGSL snapshot header by filling the memory with the callback
+ * function and adding the standard section header
+ */
static inline void *kgsl_snapshot_add_section(struct kgsl_device *device,
u16 id, void *snapshot, int *remain,
@@ -162,15 +204,25 @@
void *data = snapshot + sizeof(*header);
int ret = 0;
+ /*
+ * Sanity check to make sure there is enough for the header. The
+ * callback will check to make sure there is enough for the rest
+ * of the data. If there isn't enough room then don't advance the
+ * pointer.
+ */
if (*remain < sizeof(*header))
return snapshot;
-
+ /* It is legal to have no function (i.e. - make an empty section) */
if (func) {
ret = func(device, data, *remain, priv);
+ /*
+ * If there wasn't enough room for the data then don't bother
+ * setting up the header.
+ */
if (ret == 0)
return snapshot;
@@ -180,34 +232,82 @@
header->id = id;
header->size = ret + sizeof(*header);
-
+ /* Decrement the room left in the snapshot region */
*remain -= header->size;
-
+ /* Advance the pointer to the end of the next function */
return snapshot + header->size;
}
+/* A common helper function to dump a range of registers. This will be used in
+ * the GPU specific devices like this:
+ *
+ * struct kgsl_snapshot_registers_list list;
+ * struct kgsl_snapshot_registers priv[2];
+ *
+ * priv[0].regs = registers_array;;
+ * priv[o].count = num_registers;
+ * priv[1].regs = registers_array_new;;
+ * priv[1].count = num_registers_new;
+ *
+ * list.registers = priv;
+ * list.count = 2;
+ *
+ * kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+ * remain, kgsl_snapshot_dump_regs, &list).
+ *
+ * Pass in a struct pointing to a list of register definitions as described
+ * below:
+ *
+ * Pass in an array of register range pairs in the form of:
+ * start reg, stop reg
+ * All the registers between start and stop inclusive will be dumped
+ */
struct kgsl_snapshot_registers {
- unsigned int *regs;
- int count;
+ unsigned int *regs; /* Pointer to the array of register ranges */
+ int count; /* Number of entries in the array */
+};
+
+struct kgsl_snapshot_registers_list {
+ /* Pointer to an array of register lists */
+ struct kgsl_snapshot_registers *registers;
+ /* Number of registers lists in the array */
+ int count;
};
int kgsl_snapshot_dump_regs(struct kgsl_device *device, void *snapshot,
int remain, void *priv);
+/*
+ * A common helper function to dump a set of indexed registers. Use it
+ * like this:
+ *
+ * struct kgsl_snapshot_indexed_registers priv;
+ * priv.index = REG_INDEX;
+ * priv.data = REG_DATA;
+ * priv.count = num_registers
+ *
+ * kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS,
+ * snapshot, remain, kgsl_snapshot_dump_indexed_regs, &priv).
+ *
+ * The callback function will write an index from 0 to priv.count to
+ * the index register and read the data from the data register.
+ */
struct kgsl_snapshot_indexed_registers {
- unsigned int index;
- unsigned int data;
- unsigned int start;
- unsigned int count;
+ unsigned int index; /* Offset of the index register */
+ unsigned int data; /* Offset of the data register */
+ unsigned int start; /* Index to start with */
+ unsigned int count; /* Number of values to read from the pair */
};
+/* Helper function to snapshot a section of indexed registers */
void *kgsl_snapshot_indexed_registers(struct kgsl_device *device,
void *snapshot, int *remain, unsigned int index,
unsigned int data, unsigned int start, unsigned int count);
+/* Freeze a GPU buffer so it can be dumped in the snapshot */
int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase,
unsigned int gpuaddr, unsigned int size, unsigned int type);
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
new file mode 100644
index 0000000..d9ab081
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -0,0 +1,218 @@
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/file.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "kgsl_sync.h"
+
+struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
+ unsigned int timestamp)
+{
+ struct sync_pt *pt;
+ pt = sync_pt_create(timeline, (int) sizeof(struct kgsl_sync_pt));
+ if (pt) {
+ struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+ kpt->timestamp = timestamp;
+ }
+ return pt;
+}
+
+/*
+ * This should only be called on sync_pts which have been created but
+ * not added to a fence.
+ */
+void kgsl_sync_pt_destroy(struct sync_pt *pt)
+{
+ sync_pt_free(pt);
+}
+
+static struct sync_pt *kgsl_sync_pt_dup(struct sync_pt *pt)
+{
+ struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+ return kgsl_sync_pt_create(pt->parent, kpt->timestamp);
+}
+
+static int kgsl_sync_pt_has_signaled(struct sync_pt *pt)
+{
+ struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+ struct kgsl_sync_timeline *ktimeline =
+ (struct kgsl_sync_timeline *) pt->parent;
+ unsigned int ts = kpt->timestamp;
+ unsigned int last_ts = ktimeline->last_timestamp;
+ if (timestamp_cmp(last_ts, ts) >= 0) {
+ /* signaled */
+ return 1;
+ }
+ return 0;
+}
+
+static int kgsl_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+ struct kgsl_sync_pt *kpt_a = (struct kgsl_sync_pt *) a;
+ struct kgsl_sync_pt *kpt_b = (struct kgsl_sync_pt *) b;
+ unsigned int ts_a = kpt_a->timestamp;
+ unsigned int ts_b = kpt_b->timestamp;
+ return timestamp_cmp(ts_a, ts_b);
+}
+
+struct kgsl_fence_event_priv {
+ struct kgsl_context *context;
+ unsigned int timestamp;
+};
+
+/**
+ * kgsl_fence_event_cb - Event callback for a fence timestamp event
+ * @device - The KGSL device that expired the timestamp
+ * @priv - private data for the event
+ * @context_id - the context id that goes with the timestamp
+ * @timestamp - the timestamp that triggered the event
+ *
+ * Signal a fence following the expiration of a timestamp
+ */
+
+static inline void kgsl_fence_event_cb(struct kgsl_device *device,
+ void *priv, u32 context_id, u32 timestamp)
+{
+ struct kgsl_fence_event_priv *ev = priv;
+ kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
+ kgsl_context_put(ev->context);
+ kfree(ev);
+}
+
+/**
+ * kgsl_add_fence_event - Create a new fence event
+ * @device - KGSL device to create the event on
+ * @timestamp - Timestamp to trigger the event
+ * @data - Return fence fd stored in struct kgsl_timestamp_event_fence
+ * @len - length of the fence event
+ * @owner - driver instance that owns this event
+ * @returns 0 on success or error code on error
+ *
+ * Create a fence and register an event to signal the fence when
+ * the timestamp expires
+ */
+
+int kgsl_add_fence_event(struct kgsl_device *device,
+ u32 context_id, u32 timestamp, void __user *data, int len,
+ struct kgsl_device_private *owner)
+{
+ struct kgsl_fence_event_priv *event;
+ struct kgsl_timestamp_event_fence priv;
+ struct kgsl_context *context;
+ struct sync_pt *pt;
+ struct sync_fence *fence = NULL;
+ int ret = -EINVAL;
+
+ if (len != sizeof(priv))
+ return -EINVAL;
+
+ context = kgsl_find_context(owner, context_id);
+ if (context == NULL)
+ return -EINVAL;
+
+ event = kzalloc(sizeof(*event), GFP_KERNEL);
+ if (event == NULL)
+ return -ENOMEM;
+ event->context = context;
+ event->timestamp = timestamp;
+ kgsl_context_get(context);
+
+ pt = kgsl_sync_pt_create(context->timeline, timestamp);
+ if (pt == NULL) {
+ KGSL_DRV_ERR(device, "kgsl_sync_pt_create failed\n");
+ ret = -ENOMEM;
+ goto fail_pt;
+ }
+
+ fence = sync_fence_create("kgsl-fence", pt);
+ if (fence == NULL) {
+ /* only destroy pt when not added to fence */
+ kgsl_sync_pt_destroy(pt);
+ KGSL_DRV_ERR(device, "sync_fence_create failed\n");
+ ret = -ENOMEM;
+ goto fail_fence;
+ }
+
+ priv.fence_fd = get_unused_fd_flags(0);
+ if (priv.fence_fd < 0) {
+ KGSL_DRV_ERR(device, "invalid fence fd\n");
+ ret = -EINVAL;
+ goto fail_fd;
+ }
+ sync_fence_install(fence, priv.fence_fd);
+
+ if (copy_to_user(data, &priv, sizeof(priv))) {
+ ret = -EFAULT;
+ goto fail_copy_fd;
+ }
+
+ ret = kgsl_add_event(device, context_id, timestamp,
+ kgsl_fence_event_cb, event, owner);
+ if (ret)
+ goto fail_event;
+
+ return 0;
+
+fail_event:
+fail_copy_fd:
+ /* clean up sync_fence_install */
+ sync_fence_put(fence);
+ put_unused_fd(priv.fence_fd);
+fail_fd:
+ /* clean up sync_fence_create */
+ sync_fence_put(fence);
+fail_fence:
+fail_pt:
+ kgsl_context_put(context);
+ kfree(event);
+ return ret;
+}
+
+static const struct sync_timeline_ops kgsl_sync_timeline_ops = {
+ .dup = kgsl_sync_pt_dup,
+ .has_signaled = kgsl_sync_pt_has_signaled,
+ .compare = kgsl_sync_pt_compare,
+};
+
+int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+ struct kgsl_sync_timeline *ktimeline;
+
+ context->timeline = sync_timeline_create(&kgsl_sync_timeline_ops,
+ (int) sizeof(struct kgsl_sync_timeline), "kgsl-timeline");
+ if (context->timeline == NULL)
+ return -EINVAL;
+
+ ktimeline = (struct kgsl_sync_timeline *) context->timeline;
+ ktimeline->last_timestamp = 0;
+
+ return 0;
+}
+
+void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+ unsigned int timestamp)
+{
+ struct kgsl_sync_timeline *ktimeline =
+ (struct kgsl_sync_timeline *) timeline;
+
+ if (timestamp_cmp(timestamp, ktimeline->last_timestamp > 0))
+ ktimeline->last_timestamp = timestamp;
+ sync_timeline_signal(timeline);
+}
+
+void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+ sync_timeline_destroy(context->timeline);
+}
diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h
new file mode 100644
index 0000000..06b3ad0
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.h
@@ -0,0 +1,75 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_SYNC_H
+#define __KGSL_SYNC_H
+
+#include <linux/sync.h>
+#include "kgsl_device.h"
+
+struct kgsl_sync_timeline {
+ struct sync_timeline timeline;
+ unsigned int last_timestamp;
+};
+
+struct kgsl_sync_pt {
+ struct sync_pt pt;
+ unsigned int timestamp;
+};
+
+#if defined(CONFIG_SYNC)
+struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
+ unsigned int timestamp);
+void kgsl_sync_pt_destroy(struct sync_pt *pt);
+int kgsl_add_fence_event(struct kgsl_device *device,
+ u32 context_id, u32 timestamp, void __user *data, int len,
+ struct kgsl_device_private *owner);
+int kgsl_sync_timeline_create(struct kgsl_context *context);
+void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+ unsigned int timestamp);
+void kgsl_sync_timeline_destroy(struct kgsl_context *context);
+#else
+static inline struct sync_pt
+*kgsl_sync_pt_create(struct sync_timeline *timeline, unsigned int timestamp)
+{
+ return NULL;
+}
+
+static inline void kgsl_sync_pt_destroy(struct sync_pt *pt)
+{
+}
+
+static inline int kgsl_add_fence_event(struct kgsl_device *device,
+ u32 context_id, u32 timestamp, void __user *data, int len,
+ struct kgsl_device_private *owner)
+{
+ return -EINVAL;
+}
+
+static int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+ context->timeline = NULL;
+ return 0;
+}
+
+static inline void
+kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+ unsigned int timestamp)
+{
+}
+
+static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+}
+#endif
+
+#endif /* __KGSL_SYNC_H */
diff --git a/drivers/gpu/msm/kgsl_trace.c b/drivers/gpu/msm/kgsl_trace.c
index 8bdf996..e432729 100644
--- a/drivers/gpu/msm/kgsl_trace.c
+++ b/drivers/gpu/msm/kgsl_trace.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -14,5 +14,6 @@
#include "kgsl.h"
#include "kgsl_device.h"
+/* Instantiate tracepoints */
#define CREATE_TRACE_POINTS
#include "kgsl_trace.h"
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index 0df8409..c54445c 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -28,6 +28,9 @@
struct kgsl_ringbuffer_issueibcmds;
struct kgsl_device_waittimestamp;
+/*
+ * Tracepoint for kgsl issue ib commands
+ */
TRACE_EVENT(kgsl_issueibcmds,
TP_PROTO(struct kgsl_device *device,
@@ -75,6 +78,9 @@
)
);
+/*
+ * Tracepoint for kgsl readtimestamp
+ */
TRACE_EVENT(kgsl_readtimestamp,
TP_PROTO(struct kgsl_device *device,
@@ -107,6 +113,9 @@
)
);
+/*
+ * Tracepoint for kgsl waittimestamp entry
+ */
TRACE_EVENT(kgsl_waittimestamp_entry,
TP_PROTO(struct kgsl_device *device,
@@ -143,6 +152,9 @@
)
);
+/*
+ * Tracepoint for kgsl waittimestamp exit
+ */
TRACE_EVENT(kgsl_waittimestamp_exit,
TP_PROTO(struct kgsl_device *device, unsigned int curr_ts,
@@ -212,47 +224,6 @@
TP_ARGS(device, on)
);
-#ifdef CONFIG_MSM_KGSL_GPU_USAGE_SYSTRACE
-TRACE_EVENT(kgsl_usage,
-
- TP_PROTO(struct kgsl_device *device, int on, int pid, s64 total_time, s64 busy_time,
- unsigned int pwrlevel, unsigned int freq),
-
- TP_ARGS(device, on, pid, total_time, busy_time, pwrlevel, freq),
-
- TP_STRUCT__entry(
- __string(device_name, device->name)
- __field(int, on)
- __field(int, pid)
- __field(s64, total_time)
- __field(s64, busy_time)
- __field(unsigned int, pwrlevel)
- __field(unsigned int, freq)
- ),
-
- TP_fast_assign(
- __assign_str(device_name, device->name);
- __entry->on = on;
- __entry->pid = pid;
- __entry->total_time =total_time;
- __entry->busy_time = busy_time;
- __entry->pwrlevel = pwrlevel;
- __entry->freq = freq;
- ),
-
- TP_printk(
- "d_name=%s %s pid=%d total=%lld busy=%lld pwrlevel=%d freq=%d",
- __get_str(device_name),
- __entry->on ? "on" : "off",
- __entry->pid,
- __entry->total_time,
- __entry->busy_time,
- __entry->pwrlevel,
- __entry->freq
- )
-);
-#endif
-
TRACE_EVENT(kgsl_pwrlevel,
TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel,
@@ -280,6 +251,29 @@
)
);
+TRACE_EVENT(kgsl_mpdcvs,
+
+ TP_PROTO(struct kgsl_device *device, unsigned int state),
+
+ TP_ARGS(device, state),
+
+ TP_STRUCT__entry(
+ __string(device_name, device->name)
+ __field(unsigned int, state)
+ ),
+
+ TP_fast_assign(
+ __assign_str(device_name, device->name);
+ __entry->state = state;
+ ),
+
+ TP_printk(
+ "d_name=%s %s",
+ __get_str(device_name),
+ __entry->state ? "BUSY" : "IDLE"
+ )
+);
+
DECLARE_EVENT_CLASS(kgsl_pwrstate_template,
TP_PROTO(struct kgsl_device *device, unsigned int state),
@@ -321,16 +315,22 @@
TP_STRUCT__entry(
__field(unsigned int, gpuaddr)
__field(unsigned int, size)
+ __field(unsigned int, tgid)
+ __array(char, usage, 16)
),
TP_fast_assign(
__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
__entry->size = mem_entry->memdesc.size;
+ __entry->tgid = mem_entry->priv->pid;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
),
TP_printk(
- "gpuaddr=0x%08x size=%d",
- __entry->gpuaddr, __entry->size
+ "gpuaddr=0x%08x size=%d tgid=%d usage=%s",
+ __entry->gpuaddr, __entry->size, __entry->tgid,
+ __entry->usage
)
);
@@ -345,6 +345,8 @@
__field(unsigned int, size)
__field(int, fd)
__field(int, type)
+ __field(unsigned int, tgid)
+ __array(char, usage, 16)
),
TP_fast_assign(
@@ -352,12 +354,16 @@
__entry->size = mem_entry->memdesc.size;
__entry->fd = fd;
__entry->type = mem_entry->memtype;
+ __entry->tgid = mem_entry->priv->pid;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
),
TP_printk(
- "gpuaddr=0x%08x size=%d type=%d fd=%d",
+ "gpuaddr=0x%08x size=%d type=%d fd=%d tgid=%d usage %s",
__entry->gpuaddr, __entry->size,
- __entry->type, __entry->fd
+ __entry->type, __entry->fd, __entry->tgid,
+ __entry->usage
)
);
@@ -372,17 +378,23 @@
__field(unsigned int, size)
__field(int, type)
__field(int, fd)
+ __field(unsigned int, tgid)
+ __array(char, usage, 16)
),
TP_fast_assign(
__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
__entry->size = mem_entry->memdesc.size;
__entry->type = mem_entry->memtype;
+ __entry->tgid = mem_entry->priv->pid;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
),
TP_printk(
- "gpuaddr=0x%08x size=%d type=%d",
- __entry->gpuaddr, __entry->size, __entry->type
+ "gpuaddr=0x%08x size=%d type=%d tgid=%d usage=%s",
+ __entry->gpuaddr, __entry->size, __entry->type,
+ __entry->tgid, __entry->usage
)
);
@@ -398,6 +410,7 @@
__field(unsigned int, gpuaddr)
__field(unsigned int, size)
__field(int, type)
+ __array(char, usage, 16)
__field(unsigned int, drawctxt_id)
__field(unsigned int, curr_ts)
__field(unsigned int, free_ts)
@@ -407,6 +420,8 @@
__assign_str(device_name, device->name);
__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
__entry->size = mem_entry->memdesc.size;
+ kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+ mem_entry->memdesc.flags);
__entry->drawctxt_id = id;
__entry->type = mem_entry->memtype;
__entry->curr_ts = curr_ts;
@@ -414,12 +429,13 @@
),
TP_printk(
- "d_name=%s gpuaddr=0x%08x size=%d type=%d ctx=%u"
+ "d_name=%s gpuaddr=0x%08x size=%d type=%d usage=%s ctx=%u"
" curr_ts=0x%x free_ts=0x%x",
__get_str(device_name),
__entry->gpuaddr,
__entry->size,
__entry->type,
+ __entry->usage,
__entry->drawctxt_id,
__entry->curr_ts,
__entry->free_ts
@@ -513,12 +529,48 @@
),
TP_printk(
- "d_name=%s page=0x%08x pt=%d op=%s\n",
+ "d_name=%s page=0x%08x pt=%d op=%s",
__get_str(device_name), __entry->page, __entry->pt,
__get_str(op)
)
);
-#endif
+TRACE_EVENT(kgsl_register_event,
+ TP_PROTO(unsigned int id, unsigned int timestamp),
+ TP_ARGS(id, timestamp),
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(unsigned int, timestamp)
+ ),
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->timestamp = timestamp;
+ ),
+ TP_printk(
+ "ctx=%d ts=%d",
+ __entry->id, __entry->timestamp)
+);
+TRACE_EVENT(kgsl_fire_event,
+ TP_PROTO(unsigned int id, unsigned int ts,
+ unsigned int age),
+ TP_ARGS(id, ts, age),
+ TP_STRUCT__entry(
+ __field(unsigned int, id)
+ __field(unsigned int, ts)
+ __field(unsigned int, age)
+ ),
+ TP_fast_assign(
+ __entry->id = id;
+ __entry->ts = ts;
+ __entry->age = age;
+ ),
+ TP_printk(
+ "ctx=%d ts=%d age=%u",
+ __entry->id, __entry->ts, __entry->age)
+);
+
+#endif /* _KGSL_TRACE_H */
+
+/* This part must be outside protection */
#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
index fd03d5e..484630d 100644
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -25,9 +25,6 @@
#define DRIVER_VERSION_MAJOR 3
#define DRIVER_VERSION_MINOR 1
-#define Z180_DEVICE(device) \
- KGSL_CONTAINER_OF(device, struct z180_device, dev)
-
#define GSL_VGC_INT_MASK \
(REG_VGC_IRQSTATUS__MH_MASK | \
REG_VGC_IRQSTATUS__G2D_MASK | \
@@ -41,16 +38,12 @@
#define VGV3_CONTROL_MARKADD_FSHIFT 0
#define VGV3_CONTROL_MARKADD_FMASK 0xfff
-#define Z180_PACKET_SIZE 15
#define Z180_MARKER_SIZE 10
#define Z180_CALL_CMD 0x1000
#define Z180_MARKER_CMD 0x8000
#define Z180_STREAM_END_CMD 0x9000
#define Z180_STREAM_PACKET 0x7C000176
#define Z180_STREAM_PACKET_CALL 0x7C000275
-#define Z180_PACKET_COUNT 8
-#define Z180_RB_SIZE (Z180_PACKET_SIZE*Z180_PACKET_COUNT \
- *sizeof(uint32_t))
#define NUMTEXUNITS 4
#define TEXUNITREGCOUNT 25
@@ -69,6 +62,7 @@
#define Z180_INVALID_CONTEXT UINT_MAX
+/* z180 MH arbiter config*/
#define Z180_CFG_MHARB \
(0x10 \
| (0 << MH_ARBITER_CONFIG__SAME_PAGE_GRANULARITY__SHIFT) \
@@ -140,6 +134,9 @@
.mharb = Z180_CFG_MHARB,
.mh_intf_cfg1 = 0x00032f07,
.mh_intf_cfg2 = 0x004b274f,
+ /* turn off memory protection unit by setting
+ acceptable physical address range to include
+ all pages. */
.mpu_base = 0x00000000,
.mpu_range = 0xFFFFF000,
},
@@ -164,6 +161,9 @@
.mharb = Z180_CFG_MHARB,
.mh_intf_cfg1 = 0x00032f07,
.mh_intf_cfg2 = 0x004b274f,
+ /* turn off memory protection unit by setting
+ acceptable physical address range to include
+ all pages. */
.mpu_base = 0x00000000,
.mpu_range = 0xFFFFF000,
},
@@ -406,6 +406,10 @@
}
cmd = ibdesc[0].gpuaddr;
sizedwords = ibdesc[0].sizedwords;
+ /*
+ * Get a kernel mapping to the IB for monkey patching.
+ * See the end of this function.
+ */
entry = kgsl_sharedmem_find_region(dev_priv->process_priv, cmd,
sizedwords);
if (entry == NULL) {
@@ -414,6 +418,11 @@
result = -EINVAL;
goto error;
}
+ /*
+ * This will only map memory if it exists, otherwise it will reuse the
+ * mapping. And the 2d userspace reuses IBs so we likely won't create
+ * too many mappings.
+ */
if (kgsl_gpuaddr_to_vaddr(&entry->memdesc, cmd) == NULL) {
KGSL_DRV_ERR(device,
"Cannot make kernel mapping for gpuaddr 0x%x\n",
@@ -424,7 +433,7 @@
KGSL_CMD_INFO(device, "ctxt %d ibaddr 0x%08x sizedwords %d\n",
context->id, cmd, sizedwords);
-
+ /* context switch */
if ((context->id != (int)z180_dev->ringbuffer.prevctx) ||
(ctrl & KGSL_CONTEXT_CTX_SWITCH)) {
KGSL_CMD_INFO(device, "context switch %d -> %d\n",
@@ -458,10 +467,10 @@
addcmd(&z180_dev->ringbuffer, old_timestamp, cmd + ofs, cnt);
kgsl_pwrscale_busy(device);
-
+ /* Make sure the next ringbuffer entry has a marker */
addmarker(&z180_dev->ringbuffer, z180_dev->current_timestamp);
-
+ /* monkey patch the IB so that it jumps back to the ringbuffer */
kgsl_sharedmem_writel(&entry->memdesc,
((sizedwords + 1) * sizeof(unsigned int)),
rb_gpuaddr(z180_dev, z180_dev->current_timestamp));
@@ -469,7 +478,7 @@
((sizedwords + 2) * sizeof(unsigned int)),
nextcnt);
-
+ /* sync memory before activating the hardware for the new command*/
mb();
cmd = (int)(((2) & VGV3_CONTROL_MARKADD_FMASK)
@@ -550,7 +559,7 @@
kgsl_pwrctrl_enable(device);
-
+ /* Set interrupts to 0 to ensure a good state */
z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 0x0);
kgsl_mh_start(device);
@@ -581,7 +590,7 @@
kgsl_mmu_stop(&device->mmu);
-
+ /* Disable the clocks before the power rail. */
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
kgsl_pwrctrl_disable(device);
@@ -658,6 +667,9 @@
return 0;
}
+/* Not all Z180 registers are directly accessible.
+ * The _z180_(read|write)_simple functions below handle the ones that are.
+ */
static void _z180_regread_simple(struct kgsl_device *device,
unsigned int offsetwords,
unsigned int *value)
@@ -668,6 +680,8 @@
reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
+ /*ensure this read finishes before the next one.
+ * i.e. act like normal readl() */
*value = __raw_readl(reg);
rmb();
@@ -683,11 +697,18 @@
reg = (unsigned int *)(device->reg_virt + (offsetwords << 2));
kgsl_cffdump_regwrite(device->id, offsetwords << 2, value);
+ /*ensure previous writes post before this one,
+ * i.e. act like normal writel() */
wmb();
__raw_writel(value, reg);
}
+/* The MH registers must be accessed through via a 2 step write, (read|write)
+ * process. These registers may be accessed from interrupt context during
+ * the handling of MH or MMU error interrupts. Therefore a spin lock is used
+ * to ensure that the 2 step sequence is not interrupted.
+ */
static void _z180_regread_mmu(struct kgsl_device *device,
unsigned int offsetwords,
unsigned int *value)
@@ -723,6 +744,9 @@
spin_unlock_irqrestore(&z180_dev->cmdwin_lock, flags);
}
+/* the rest of the code doesn't want to think about if it is writing mmu
+ * registers or normal registers so handle it here
+ */
static void z180_regread(struct kgsl_device *device,
unsigned int offsetwords,
unsigned int *value)
@@ -776,7 +800,7 @@
{
struct z180_device *z180_dev = Z180_DEVICE(device);
(void)context;
-
+ /* get current EOP timestamp */
return z180_dev->timestamp;
}
@@ -787,7 +811,7 @@
{
int status = -EINVAL;
-
+ /* Don't wait forever, set a max (10 sec) value for now */
if (msecs == -1)
msecs = 10 * MSEC_PER_SEC;
@@ -816,6 +840,7 @@
else if (timeout == 0) {
status = -ETIMEDOUT;
kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
+ kgsl_postmortem_dump(device, 0);
} else
status = timeout;
@@ -858,7 +883,7 @@
static void z180_irqctrl(struct kgsl_device *device, int state)
{
-
+ /* Control interrupts for Z180 and the Z180 MMU */
if (state) {
z180_regwrite(device, (ADDR_VGC_IRQENABLE >> 2), 3);
@@ -875,12 +900,16 @@
if (chipid != NULL)
*chipid = 0;
+ /* Standard KGSL gpuid format:
+ * top word is 0x0002 for 2D or 0x0003 for 3D
+ * Bottom word is core specific identifer
+ */
return (0x0002 << 16) | 180;
}
static const struct kgsl_functable z180_functable = {
-
+ /* Mandatory functions */
.regread = z180_regread,
.regwrite = z180_regwrite,
.idle = z180_idle,
@@ -898,10 +927,11 @@
.irqctrl = z180_irqctrl,
.gpuid = z180_gpuid,
.irq_handler = z180_irq_handler,
-
+ /* Optional functions */
.drawctxt_create = NULL,
.drawctxt_destroy = z180_drawctxt_destroy,
.ioctl = NULL,
+ .postmortem_dump = z180_dump,
};
static struct platform_device_id z180_id_table[] = {
diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h
index 7f4ab7f..268aac3 100644
--- a/drivers/gpu/msm/z180.h
+++ b/drivers/gpu/msm/z180.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -19,8 +19,16 @@
#define DEVICE_2D0_NAME "kgsl-2d0"
#define DEVICE_2D1_NAME "kgsl-2d1"
+#define Z180_PACKET_SIZE 15
+#define Z180_PACKET_COUNT 8
+#define Z180_RB_SIZE (Z180_PACKET_SIZE*Z180_PACKET_COUNT \
+ *sizeof(uint32_t))
+#define Z180_DEVICE(device) \
+ KGSL_CONTAINER_OF(device, struct z180_device, dev)
+
#define Z180_DEFAULT_PWRSCALE_POLICY NULL
+/* Wait a maximum of 10 seconds when trying to idle the core */
#define Z180_IDLE_TIMEOUT (10 * 1000)
struct z180_ringbuffer {
@@ -29,11 +37,13 @@
};
struct z180_device {
- struct kgsl_device dev;
+ struct kgsl_device dev; /* Must be first field in this struct */
int current_timestamp;
int timestamp;
struct z180_ringbuffer ringbuffer;
spinlock_t cmdwin_lock;
};
-#endif
+int z180_dump(struct kgsl_device *, int);
+
+#endif /* __Z180_H */
diff --git a/drivers/gpu/msm/z180_postmortem.c b/drivers/gpu/msm/z180_postmortem.c
new file mode 100644
index 0000000..c1e5f07
--- /dev/null
+++ b/drivers/gpu/msm/z180_postmortem.c
@@ -0,0 +1,230 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "z180.h"
+#include "z180_reg.h"
+
+#define Z180_STREAM_PACKET_CALL 0x7C000275
+
+/* Postmortem Dump formatted Output parameters */
+
+/* Number of Words per dump data line */
+#define WORDS_PER_LINE 8
+
+/* Number of spaces per dump data line */
+#define NUM_SPACES (WORDS_PER_LINE - 1)
+
+/*
+ * Output dump data is formatted as string, hence number of chars
+ * per line for line string allocation
+ */
+#define CHARS_PER_LINE \
+ ((WORDS_PER_LINE * (2*sizeof(unsigned int))) + NUM_SPACES + 1)
+
+/* Z180 registers (byte offsets) to be dumped */
+static const unsigned int regs_to_dump[] = {
+ ADDR_VGC_VERSION,
+ ADDR_VGC_SYSSTATUS,
+ ADDR_VGC_IRQSTATUS,
+ ADDR_VGC_IRQENABLE,
+ ADDR_VGC_IRQ_ACTIVE_CNT,
+ ADDR_VGC_CLOCKEN,
+ ADDR_VGC_MH_DATA_ADDR,
+ ADDR_VGC_GPR0,
+ ADDR_VGC_GPR1,
+ ADDR_VGC_BUSYCNT,
+ ADDR_VGC_FIFOFREE,
+};
+
+/**
+ * z180_dump_regs - Dumps all of Z180 external registers. Prints the word offset
+ * of the register in each output line.
+ * @device: kgsl_device pointer to the Z180 core
+ */
+static void z180_dump_regs(struct kgsl_device *device)
+{
+ unsigned int i;
+ unsigned int reg_val;
+
+ KGSL_LOG_DUMP(device, "Z180 Register Dump\n");
+ for (i = 0; i < ARRAY_SIZE(regs_to_dump); i++) {
+ kgsl_regread(device,
+ regs_to_dump[i]/sizeof(unsigned int), ®_val);
+ KGSL_LOG_DUMP(device, "REG: %04X: %08X\n",
+ regs_to_dump[i]/sizeof(unsigned int), reg_val);
+ }
+}
+
+/**
+ * z180_dump_ringbuffer - Dumps the Z180 core's ringbuffer contents
+ * @device: kgsl_device pointer to the z180 core
+ */
+static void z180_dump_ringbuffer(struct kgsl_device *device)
+{
+ unsigned int rb_size;
+ unsigned int *rb_hostptr;
+ unsigned int rb_words;
+ unsigned int rb_gpuaddr;
+ struct z180_device *z180_dev = Z180_DEVICE(device);
+ unsigned int i;
+ char linebuf[CHARS_PER_LINE];
+
+ KGSL_LOG_DUMP(device, "Z180 ringbuffer dump\n");
+
+ rb_hostptr = (unsigned int *) z180_dev->ringbuffer.cmdbufdesc.hostptr;
+
+ rb_size = Z180_RB_SIZE;
+ rb_gpuaddr = z180_dev->ringbuffer.cmdbufdesc.gpuaddr;
+
+ rb_words = rb_size/sizeof(unsigned int);
+
+ KGSL_LOG_DUMP(device, "ringbuffer size: %u\n", rb_size);
+
+ KGSL_LOG_DUMP(device, "rb_words: %d\n", rb_words);
+
+ for (i = 0; i < rb_words; i += WORDS_PER_LINE) {
+ hex_dump_to_buffer(rb_hostptr+i,
+ rb_size - i*sizeof(unsigned int),
+ WORDS_PER_LINE*sizeof(unsigned int),
+ sizeof(unsigned int), linebuf,
+ sizeof(linebuf), false);
+ KGSL_LOG_DUMP(device, "RB: %04X: %s\n",
+ rb_gpuaddr + i*sizeof(unsigned int), linebuf);
+ }
+}
+
+
+static void z180_dump_ib(struct kgsl_device *device)
+{
+ unsigned int rb_size;
+ unsigned int *rb_hostptr;
+ unsigned int rb_words;
+ unsigned int rb_gpuaddr;
+ unsigned int ib_gpuptr = 0;
+ unsigned int ib_size = 0;
+ void *ib_hostptr = NULL;
+ int rb_slot_num = -1;
+ struct z180_device *z180_dev = Z180_DEVICE(device);
+ struct kgsl_mem_entry *entry = NULL;
+ unsigned int pt_base;
+ unsigned int i;
+ unsigned int j;
+ char linebuf[CHARS_PER_LINE];
+ unsigned int current_ib_slot;
+ unsigned int len;
+ unsigned int rowsize;
+ KGSL_LOG_DUMP(device, "Z180 IB dump\n");
+
+ rb_hostptr = (unsigned int *) z180_dev->ringbuffer.cmdbufdesc.hostptr;
+
+ rb_size = Z180_RB_SIZE;
+ rb_gpuaddr = z180_dev->ringbuffer.cmdbufdesc.gpuaddr;
+
+ rb_words = rb_size/sizeof(unsigned int);
+
+ KGSL_LOG_DUMP(device, "Ringbuffer size (bytes): %u\n", rb_size);
+
+ KGSL_LOG_DUMP(device, "rb_words: %d\n", rb_words);
+
+ pt_base = kgsl_mmu_get_current_ptbase(&device->mmu);
+
+ /* Dump the current IB */
+ for (i = 0; i < rb_words; i++) {
+ if (rb_hostptr[i] == Z180_STREAM_PACKET_CALL) {
+
+ rb_slot_num++;
+ current_ib_slot =
+ z180_dev->current_timestamp % Z180_PACKET_COUNT;
+ if (rb_slot_num != current_ib_slot)
+ continue;
+
+ ib_gpuptr = rb_hostptr[i+1];
+
+ entry = kgsl_get_mem_entry(device, pt_base, ib_gpuptr,
+ 1);
+
+ if (entry == NULL) {
+ KGSL_LOG_DUMP(device,
+ "IB mem entry not found for ringbuffer slot#: %d\n",
+ rb_slot_num);
+ continue;
+ }
+
+ ib_hostptr = kgsl_memdesc_map(&entry->memdesc);
+
+ if (ib_hostptr == NULL) {
+ KGSL_LOG_DUMP(device,
+ "Could not map IB to kernel memory, Ringbuffer Slot: %d\n",
+ rb_slot_num);
+ continue;
+ }
+
+ ib_size = entry->memdesc.size;
+ KGSL_LOG_DUMP(device,
+ "IB size: %dbytes, IB size in words: %d\n",
+ ib_size,
+ ib_size/sizeof(unsigned int));
+
+ for (j = 0; j < ib_size; j += WORDS_PER_LINE) {
+ len = ib_size - j*sizeof(unsigned int);
+ rowsize = WORDS_PER_LINE*sizeof(unsigned int);
+ hex_dump_to_buffer(ib_hostptr+j, len, rowsize,
+ sizeof(unsigned int), linebuf,
+ sizeof(linebuf), false);
+ KGSL_LOG_DUMP(device, "IB%d: %04X: %s\n",
+ rb_slot_num,
+ (rb_gpuaddr +
+ j*sizeof(unsigned int)),
+ linebuf);
+ }
+ KGSL_LOG_DUMP(device, "IB Dump Finished\n");
+ }
+ }
+}
+
+
+/**
+ * z180_dump - Dumps the Z180 ringbuffer and registers (and IBs if asked for)
+ * for postmortem
+ * analysis.
+ * @device: kgsl_device pointer to the Z180 core
+ */
+int z180_dump(struct kgsl_device *device, int manual)
+{
+ struct z180_device *z180_dev = Z180_DEVICE(device);
+
+ mb();
+
+ KGSL_LOG_DUMP(device, "Retired Timestamp: %d\n", z180_dev->timestamp);
+ KGSL_LOG_DUMP(device,
+ "Current Timestamp: %d\n", z180_dev->current_timestamp);
+
+ /* Dump ringbuffer */
+ z180_dump_ringbuffer(device);
+
+ /* Dump registers */
+ z180_dump_regs(device);
+
+ /* Dump IBs, if asked for */
+ if (device->pm_ib_enabled)
+ z180_dump_ib(device);
+
+ /* Get the stack trace if the dump was automatic */
+ if (!manual)
+ BUG_ON(1);
+
+ return 0;
+}
+
diff --git a/drivers/gpu/msm/z180_reg.h b/drivers/gpu/msm/z180_reg.h
index 382d0c5..81f1fdc 100644
--- a/drivers/gpu/msm/z180_reg.h
+++ b/drivers/gpu/msm/z180_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -45,5 +45,12 @@
#define ADDR_VGV3_NEXTADDR 0x0075
#define ADDR_VGV3_NEXTCMD 0x0076
#define ADDR_VGV3_WRITEADDR 0x0072
+#define ADDR_VGC_VERSION 0x400
+#define ADDR_VGC_SYSSTATUS 0x410
+#define ADDR_VGC_CLOCKEN 0x508
+#define ADDR_VGC_GPR0 0x520
+#define ADDR_VGC_GPR1 0x528
+#define ADDR_VGC_BUSYCNT 0x530
+#define ADDR_VGC_FIFOFREE 0x7c0
-#endif
+#endif /* __Z180_REG_H */
diff --git a/drivers/gpu/msm/z180_trace.c b/drivers/gpu/msm/z180_trace.c
index c5349db..9d971ee 100644
--- a/drivers/gpu/msm/z180_trace.c
+++ b/drivers/gpu/msm/z180_trace.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -15,5 +15,6 @@
#include "z180.h"
#include "z180_reg.h"
+/* Instantiate tracepoints */
#define CREATE_TRACE_POINTS
#include "z180_trace.h"
diff --git a/drivers/gpu/msm/z180_trace.h b/drivers/gpu/msm/z180_trace.h
index 3536655..4f65b9b 100644
--- a/drivers/gpu/msm/z180_trace.h
+++ b/drivers/gpu/msm/z180_trace.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2011, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -25,6 +25,9 @@
struct kgsl_device;
+/*
+ * Tracepoint for z180 irq. Includes status info
+ */
TRACE_EVENT(kgsl_z180_irq_status,
TP_PROTO(struct kgsl_device *device, unsigned int status),
@@ -51,6 +54,7 @@
)
);
-#endif
+#endif /* _Z180_TRACE_H */
+/* This part must be outside protection */
#include <trace/define_trace.h>