drm/radeon: split r6xx and r7xx copy_dma functions

- r6xx actually uses a slightly different packet format,
although both formats seem to work ok.
- r7xx doesn't have the count multiple of 2 limitation.

Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 923f936..537e259 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -2646,7 +2646,7 @@
  * @num_gpu_pages: number of GPU pages to xfer
  * @fence: radeon fence object
  *
- * Copy GPU paging using the DMA engine (r6xx-r7xx).
+ * Copy GPU paging using the DMA engine (r6xx).
  * Used by the radeon ttm implementation to move pages if
  * registered as the asic copy callback.
  */
@@ -2669,8 +2669,8 @@
 	}
 
 	size_in_dw = (num_gpu_pages << RADEON_GPU_PAGE_SHIFT) / 4;
-	num_loops = DIV_ROUND_UP(size_in_dw, 0xffff);
-	r = radeon_ring_lock(rdev, ring, num_loops * 5 + 8);
+	num_loops = DIV_ROUND_UP(size_in_dw, 0xFFFE);
+	r = radeon_ring_lock(rdev, ring, num_loops * 4 + 8);
 	if (r) {
 		DRM_ERROR("radeon: moving bo (%d).\n", r);
 		radeon_semaphore_free(rdev, &sem, NULL);
@@ -2693,8 +2693,8 @@
 		radeon_ring_write(ring, DMA_PACKET(DMA_PACKET_COPY, 0, 0, cur_size_in_dw));
 		radeon_ring_write(ring, dst_offset & 0xfffffffc);
 		radeon_ring_write(ring, src_offset & 0xfffffffc);
-		radeon_ring_write(ring, upper_32_bits(dst_offset) & 0xff);
-		radeon_ring_write(ring, upper_32_bits(src_offset) & 0xff);
+		radeon_ring_write(ring, (((upper_32_bits(dst_offset) & 0xff) << 16) |
+					 (upper_32_bits(src_offset) & 0xff)));
 		src_offset += cur_size_in_dw * 4;
 		dst_offset += cur_size_in_dw * 4;
 	}