[POWERPC] Refactor 64 bits DMA operations

This patch completely refactors DMA operations for 64 bits powerpc. 32 bits
is untouched for now.

We use the new dev_archdata structure to add the dma operations pointer
and associated data to struct device. While at it, we also add the OF node
pointer and numa node. In the future, we might want to look into merging
that with pci_dn as well.

The old vio, pci-iommu and pci-direct DMA ops are gone. They are now replaced
by a set of generic iommu and direct DMA ops (non PCI specific) that can be
used by bus types. The toplevel implementation is now inline.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
diff --git a/arch/powerpc/kernel/dma_64.c b/arch/powerpc/kernel/dma_64.c
index 6c168f6..4e65511 100644
--- a/arch/powerpc/kernel/dma_64.c
+++ b/arch/powerpc/kernel/dma_64.c
@@ -1,151 +1,185 @@
 /*
- * Copyright (C) 2004 IBM Corporation
+ * Copyright (C) 2006 Benjamin Herrenschmidt, IBM Corporation
  *
- * Implements the generic device dma API for ppc64. Handles
- * the pci and vio busses
+ * Provide default implementations of the DMA mapping callbacks for
+ * directly mapped busses and busses using the iommu infrastructure
  */
 
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
-/* Include the busses we support */
-#include <linux/pci.h>
-#include <asm/vio.h>
-#include <asm/ibmebus.h>
-#include <asm/scatterlist.h>
 #include <asm/bug.h>
+#include <asm/iommu.h>
+#include <asm/abs_addr.h>
 
-static struct dma_mapping_ops *get_dma_ops(struct device *dev)
+/*
+ * Generic iommu implementation
+ */
+
+static inline unsigned long device_to_mask(struct device *dev)
 {
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return &pci_dma_ops;
-#endif
-#ifdef CONFIG_IBMVIO
-	if (dev->bus == &vio_bus_type)
-		return &vio_dma_ops;
-#endif
-#ifdef CONFIG_IBMEBUS
-	if (dev->bus == &ibmebus_bus_type)
-		return &ibmebus_dma_ops;
-#endif
-	return NULL;
+	if (dev->dma_mask && *dev->dma_mask)
+		return *dev->dma_mask;
+	/* Assume devices without mask can take 32 bit addresses */
+	return 0xfffffffful;
 }
 
-int dma_supported(struct device *dev, u64 mask)
+
+/* Allocates a contiguous real buffer and creates mappings over it.
+ * Returns the virtual address of the buffer and sets dma_handle
+ * to the dma address (mapping) of the first page.
+ */
+static void *dma_iommu_alloc_coherent(struct device *dev, size_t size,
+				      dma_addr_t *dma_handle, gfp_t flag)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	return dma_ops->dma_supported(dev, mask);
+	return iommu_alloc_coherent(dev->archdata.dma_data, size, dma_handle,
+				    device_to_mask(dev), flag,
+				    dev->archdata.numa_node);
 }
-EXPORT_SYMBOL(dma_supported);
 
-int dma_set_mask(struct device *dev, u64 dma_mask)
+static void dma_iommu_free_coherent(struct device *dev, size_t size,
+				    void *vaddr, dma_addr_t dma_handle)
 {
-#ifdef CONFIG_PCI
-	if (dev->bus == &pci_bus_type)
-		return pci_set_dma_mask(to_pci_dev(dev), dma_mask);
-#endif
-#ifdef CONFIG_IBMVIO
-	if (dev->bus == &vio_bus_type)
-		return -EIO;
-#endif /* CONFIG_IBMVIO */
-#ifdef CONFIG_IBMEBUS
-	if (dev->bus == &ibmebus_bus_type)
-		return -EIO;
-#endif
-	BUG();
-	return 0;
+	iommu_free_coherent(dev->archdata.dma_data, size, vaddr, dma_handle);
 }
-EXPORT_SYMBOL(dma_set_mask);
 
-void *dma_alloc_coherent(struct device *dev, size_t size,
-		dma_addr_t *dma_handle, gfp_t flag)
+/* Creates TCEs for a user provided buffer.  The user buffer must be
+ * contiguous real kernel storage (not vmalloc).  The address of the buffer
+ * passed here is the kernel (virtual) address of the buffer.  The buffer
+ * need not be page aligned, the dma_addr_t returned will point to the same
+ * byte within the page as vaddr.
+ */
+static dma_addr_t dma_iommu_map_single(struct device *dev, void *vaddr,
+				       size_t size,
+				       enum dma_data_direction direction)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	return dma_ops->alloc_coherent(dev, size, dma_handle, flag);
+	return iommu_map_single(dev->archdata.dma_data, vaddr, size,
+			        device_to_mask(dev), direction);
 }
-EXPORT_SYMBOL(dma_alloc_coherent);
 
-void dma_free_coherent(struct device *dev, size_t size, void *cpu_addr,
-		dma_addr_t dma_handle)
+
+static void dma_iommu_unmap_single(struct device *dev, dma_addr_t dma_handle,
+				   size_t size,
+				   enum dma_data_direction direction)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	dma_ops->free_coherent(dev, size, cpu_addr, dma_handle);
+	iommu_unmap_single(dev->archdata.dma_data, dma_handle, size, direction);
 }
-EXPORT_SYMBOL(dma_free_coherent);
 
-dma_addr_t dma_map_single(struct device *dev, void *cpu_addr, size_t size,
-		enum dma_data_direction direction)
+
+static int dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
+			    int nelems, enum dma_data_direction direction)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	return dma_ops->map_single(dev, cpu_addr, size, direction);
+	return iommu_map_sg(dev->archdata.dma_data, sglist, nelems,
+			    device_to_mask(dev), direction);
 }
-EXPORT_SYMBOL(dma_map_single);
 
-void dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size,
-		enum dma_data_direction direction)
+static void dma_iommu_unmap_sg(struct device *dev, struct scatterlist *sglist,
+		int nelems, enum dma_data_direction direction)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	dma_ops->unmap_single(dev, dma_addr, size, direction);
+	iommu_unmap_sg(dev->archdata.dma_data, sglist, nelems, direction);
 }
-EXPORT_SYMBOL(dma_unmap_single);
 
-dma_addr_t dma_map_page(struct device *dev, struct page *page,
-		unsigned long offset, size_t size,
-		enum dma_data_direction direction)
+/* We support DMA to/from any memory page via the iommu */
+static int dma_iommu_dma_supported(struct device *dev, u64 mask)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+	struct iommu_table *tbl = dev->archdata.dma_data;
 
-	BUG_ON(!dma_ops);
-
-	return dma_ops->map_single(dev, page_address(page) + offset, size,
-			direction);
+	if (!tbl || tbl->it_offset > mask) {
+		printk(KERN_INFO
+		       "Warning: IOMMU offset too big for device mask\n");
+		if (tbl)
+			printk(KERN_INFO
+			       "mask: 0x%08lx, table offset: 0x%08lx\n",
+				mask, tbl->it_offset);
+		else
+			printk(KERN_INFO "mask: 0x%08lx, table unavailable\n",
+				mask);
+		return 0;
+	} else
+		return 1;
 }
-EXPORT_SYMBOL(dma_map_page);
 
-void dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size,
-		enum dma_data_direction direction)
+struct dma_mapping_ops dma_iommu_ops = {
+	.alloc_coherent	= dma_iommu_alloc_coherent,
+	.free_coherent	= dma_iommu_free_coherent,
+	.map_single	= dma_iommu_map_single,
+	.unmap_single	= dma_iommu_unmap_single,
+	.map_sg		= dma_iommu_map_sg,
+	.unmap_sg	= dma_iommu_unmap_sg,
+	.dma_supported	= dma_iommu_dma_supported,
+};
+EXPORT_SYMBOL(dma_iommu_ops);
+
+/*
+ * Generic direct DMA implementation
+ */
+
+static void *dma_direct_alloc_coherent(struct device *dev, size_t size,
+				       dma_addr_t *dma_handle, gfp_t flag)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
+	void *ret;
 
-	BUG_ON(!dma_ops);
-
-	dma_ops->unmap_single(dev, dma_address, size, direction);
+	/* TODO: Maybe use the numa node here too ? */
+	ret = (void *)__get_free_pages(flag, get_order(size));
+	if (ret != NULL) {
+		memset(ret, 0, size);
+		*dma_handle = virt_to_abs(ret);
+	}
+	return ret;
 }
-EXPORT_SYMBOL(dma_unmap_page);
 
-int dma_map_sg(struct device *dev, struct scatterlist *sg, int nents,
-		enum dma_data_direction direction)
+static void dma_direct_free_coherent(struct device *dev, size_t size,
+				     void *vaddr, dma_addr_t dma_handle)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	return dma_ops->map_sg(dev, sg, nents, direction);
+	free_pages((unsigned long)vaddr, get_order(size));
 }
-EXPORT_SYMBOL(dma_map_sg);
 
-void dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries,
-		enum dma_data_direction direction)
+static dma_addr_t dma_direct_map_single(struct device *dev, void *ptr,
+					size_t size,
+					enum dma_data_direction direction)
 {
-	struct dma_mapping_ops *dma_ops = get_dma_ops(dev);
-
-	BUG_ON(!dma_ops);
-
-	dma_ops->unmap_sg(dev, sg, nhwentries, direction);
+	return virt_to_abs(ptr);
 }
-EXPORT_SYMBOL(dma_unmap_sg);
+
+static void dma_direct_unmap_single(struct device *dev, dma_addr_t dma_addr,
+				    size_t size,
+				    enum dma_data_direction direction)
+{
+}
+
+static int dma_direct_map_sg(struct device *dev, struct scatterlist *sg,
+			     int nents, enum dma_data_direction direction)
+{
+	int i;
+
+	for (i = 0; i < nents; i++, sg++) {
+		sg->dma_address = page_to_phys(sg->page) + sg->offset;
+		sg->dma_length = sg->length;
+	}
+
+	return nents;
+}
+
+static void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sg,
+				int nents, enum dma_data_direction direction)
+{
+}
+
+static int dma_direct_dma_supported(struct device *dev, u64 mask)
+{
+	/* Could be improved to check for memory though it better be
+	 * done via some global so platforms can set the limit in case
+	 * they have limited DMA windows
+	 */
+	return mask >= DMA_32BIT_MASK;
+}
+
+struct dma_mapping_ops dma_direct_ops = {
+	.alloc_coherent	= dma_direct_alloc_coherent,
+	.free_coherent	= dma_direct_free_coherent,
+	.map_single	= dma_direct_map_single,
+	.unmap_single	= dma_direct_unmap_single,
+	.map_sg		= dma_direct_map_sg,
+	.unmap_sg	= dma_direct_unmap_sg,
+	.dma_supported	= dma_direct_dma_supported,
+};
+EXPORT_SYMBOL(dma_direct_ops);