drm/radeon/kms: simplify & improve GPU reset V2
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
of the times but once in a while it fails for no obvious
reasons (same status than previous reset just no same
happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
it seems that it can fail after a bunch (reset every
2sec for 3hour bring down the GPU & computer)
This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).
Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.
Next step is to record the bogus command that leaded to
the lockup.
V2 Fix r6xx resume path to avoid reinitializing blit
module, use the gpu_lockup boolean to avoid entering
inifinite waiting loop on fence while reiniting the GPU
Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/rs600.c b/drivers/gpu/drm/radeon/rs600.c
index c1be20a..a16d9d7 100644
--- a/drivers/gpu/drm/radeon/rs600.c
+++ b/drivers/gpu/drm/radeon/rs600.c
@@ -147,6 +147,78 @@
}
}
+void rs600_bm_disable(struct radeon_device *rdev)
+{
+ u32 tmp;
+
+ /* disable bus mastering */
+ pci_read_config_word(rdev->pdev, 0x4, (u16*)&tmp);
+ pci_write_config_word(rdev->pdev, 0x4, tmp & 0xFFFB);
+ mdelay(1);
+}
+
+int rs600_asic_reset(struct radeon_device *rdev)
+{
+ u32 status, tmp;
+
+ struct rv515_mc_save save;
+
+ /* Stops all mc clients */
+ rv515_mc_stop(rdev, &save);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ if (!G_000E40_GUI_ACTIVE(status)) {
+ return 0;
+ }
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* stop CP */
+ WREG32(RADEON_CP_CSQ_CNTL, 0);
+ tmp = RREG32(RADEON_CP_RB_CNTL);
+ WREG32(RADEON_CP_RB_CNTL, tmp | RADEON_RB_RPTR_WR_ENA);
+ WREG32(RADEON_CP_RB_RPTR_WR, 0);
+ WREG32(RADEON_CP_RB_WPTR, 0);
+ WREG32(RADEON_CP_RB_CNTL, tmp);
+ pci_save_state(rdev->pdev);
+ /* disable bus mastering */
+ rs600_bm_disable(rdev);
+ /* reset GA+VAP */
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_VAP(1) |
+ S_0000F0_SOFT_RESET_GA(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* reset CP */
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_CP(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* reset MC */
+ WREG32(R_0000F0_RBBM_SOFT_RESET, S_0000F0_SOFT_RESET_MC(1));
+ RREG32(R_0000F0_RBBM_SOFT_RESET);
+ mdelay(500);
+ WREG32(R_0000F0_RBBM_SOFT_RESET, 0);
+ mdelay(1);
+ status = RREG32(R_000E40_RBBM_STATUS);
+ dev_info(rdev->dev, "(%s:%d) RBBM_STATUS=0x%08X\n", __func__, __LINE__, status);
+ /* restore PCI & busmastering */
+ pci_restore_state(rdev->pdev);
+ /* Check if GPU is idle */
+ if (G_000E40_GA_BUSY(status) || G_000E40_VAP_BUSY(status)) {
+ dev_err(rdev->dev, "failed to reset GPU\n");
+ rdev->gpu_lockup = true;
+ return -1;
+ }
+ rv515_mc_resume(rdev, &save);
+ dev_info(rdev->dev, "GPU reset succeed\n");
+ return 0;
+}
+
/*
* GART.
*/
@@ -454,7 +526,6 @@
void rs600_gpu_init(struct radeon_device *rdev)
{
- r100_hdp_reset(rdev);
r420_pipes_init(rdev);
/* Wait for mc idle */
if (rs600_mc_wait_for_idle(rdev))