[PATCH] sh: I/O routine cleanups and ioremap() overhaul

This introduces a few changes in the way that the I/O routines are defined on
SH, specifically so that things like the iomap API properly wrap through the
machvec for board-specific quirks.

In addition to this, the old p3_ioremap() work is converted to a more generic
__ioremap() that will map through the PMB if it's available, or fall back on
page tables for everything else.

An alpha-like IO_CONCAT is also added so we can start to clean up the
board-specific io.h mess, which will be handled in board update patches..

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/sh/kernel/io.c b/arch/sh/kernel/io.c
index d9932f2..71c9fde 100644
--- a/arch/sh/kernel/io.c
+++ b/arch/sh/kernel/io.c
@@ -2,58 +2,73 @@
  * linux/arch/sh/kernel/io.c
  *
  * Copyright (C) 2000  Stuart Menefy
+ * Copyright (C) 2005  Paul Mundt
  *
  * Provide real functions which expand to whatever the header file defined.
  * Also definitions of machine independent IO functions.
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
-
-#include <asm/io.h>
 #include <linux/module.h>
+#include <asm/machvec.h>
+#include <asm/io.h>
 
 /*
  * Copy data from IO memory space to "real" memory space.
  * This needs to be optimized.
  */
-void  memcpy_fromio(void * to, unsigned long from, unsigned long count)
+void memcpy_fromio(void *to, volatile void __iomem *from, unsigned long count)
 {
 	char *p = to;
         while (count) {
                 count--;
-                *p = readb(from);
+                *p = readb((void __iomem *)from);
                 p++;
                 from++;
         }
 }
- 
+EXPORT_SYMBOL(memcpy_fromio);
+
 /*
  * Copy data from "real" memory space to IO memory space.
  * This needs to be optimized.
  */
-void  memcpy_toio(unsigned long to, const void * from, unsigned long count)
+void memcpy_toio(volatile void __iomem *to, const void *from, unsigned long count)
 {
 	const char *p = from;
         while (count) {
                 count--;
-                writeb(*p, to);
+                writeb(*p, (void __iomem *)to);
                 p++;
                 to++;
         }
 }
- 
+EXPORT_SYMBOL(memcpy_toio);
+
 /*
  * "memset" on IO memory space.
  * This needs to be optimized.
  */
-void  memset_io(unsigned long dst, int c, unsigned long count)
+void memset_io(volatile void __iomem *dst, int c, unsigned long count)
 {
         while (count) {
                 count--;
-                writeb(c, dst);
+                writeb(c, (void __iomem *)dst);
                 dst++;
         }
 }
-
-EXPORT_SYMBOL(memcpy_fromio);
-EXPORT_SYMBOL(memcpy_toio);
 EXPORT_SYMBOL(memset_io);
 
+void __iomem *ioport_map(unsigned long port, unsigned int nr)
+{
+	return sh_mv.mv_ioport_map(port, nr);
+}
+EXPORT_SYMBOL(ioport_map);
+
+void ioport_unmap(void __iomem *addr)
+{
+	sh_mv.mv_ioport_unmap(addr);
+}
+EXPORT_SYMBOL(ioport_unmap);
diff --git a/arch/sh/kernel/io_generic.c b/arch/sh/kernel/io_generic.c
index a911b01..28ec748 100644
--- a/arch/sh/kernel/io_generic.c
+++ b/arch/sh/kernel/io_generic.c
@@ -3,6 +3,7 @@
  * linux/arch/sh/kernel/io_generic.c
  *
  * Copyright (C) 2000  Niibe Yutaka
+ * Copyright (C) 2005  Paul Mundt
  *
  * Generic I/O routine. These can be used where a machine specific version
  * is not required.
@@ -10,21 +11,20 @@
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
- *
  */
-
+#include <linux/module.h>
 #include <asm/io.h>
 #include <asm/machvec.h>
-#include <linux/module.h>
 
-#if defined(CONFIG_CPU_SH3)
+#ifdef CONFIG_CPU_SH3
+/* SH3 has a PCMCIA bug that needs a dummy read from area 6 for a
+ * workaround. */
 /* I'm not sure SH7709 has this kind of bug */
-#define SH3_PCMCIA_BUG_WORKAROUND 1
-#define DUMMY_READ_AREA6	  0xba000000
+#define dummy_read()	ctrl_inb(0xba000000)
+#else
+#define dummy_read()
 #endif
 
-#define PORT2ADDR(x) (sh_mv.mv_isa_port2addr(x))
-
 unsigned long generic_io_base;
 
 static inline void delay(void)
@@ -32,40 +32,40 @@
 	ctrl_inw(0xa0000000);
 }
 
-unsigned char generic_inb(unsigned long port)
+u8 generic_inb(unsigned long port)
 {
-	return *(volatile unsigned char*)PORT2ADDR(port);
+	return ctrl_inb((unsigned long __force)ioport_map(port, 1));
 }
 
-unsigned short generic_inw(unsigned long port)
+u16 generic_inw(unsigned long port)
 {
-	return *(volatile unsigned short*)PORT2ADDR(port);
+	return ctrl_inw((unsigned long __force)ioport_map(port, 2));
 }
 
-unsigned int generic_inl(unsigned long port)
+u32 generic_inl(unsigned long port)
 {
-	return *(volatile unsigned long*)PORT2ADDR(port);
+	return ctrl_inl((unsigned long __force)ioport_map(port, 4));
 }
 
-unsigned char generic_inb_p(unsigned long port)
+u8 generic_inb_p(unsigned long port)
 {
-	unsigned long v = *(volatile unsigned char*)PORT2ADDR(port);
+	unsigned long v = generic_inb(port);
 
 	delay();
 	return v;
 }
 
-unsigned short generic_inw_p(unsigned long port)
+u16 generic_inw_p(unsigned long port)
 {
-	unsigned long v = *(volatile unsigned short*)PORT2ADDR(port);
+	unsigned long v = generic_inw(port);
 
 	delay();
 	return v;
 }
 
-unsigned int generic_inl_p(unsigned long port)
+u32 generic_inl_p(unsigned long port)
 {
-	unsigned long v = *(volatile unsigned long*)PORT2ADDR(port);
+	unsigned long v = generic_inl(port);
 
 	delay();
 	return v;
@@ -77,75 +77,70 @@
  * convert the port address to real address once.
  */
 
-void generic_insb(unsigned long port, void *buffer, unsigned long count)
+void generic_insb(unsigned long port, void *dst, unsigned long count)
 {
-	volatile unsigned char *port_addr;
-	unsigned char *buf=buffer;
+	volatile u8 *port_addr;
+	u8 *buf = dst;
 
-	port_addr = (volatile unsigned char *)PORT2ADDR(port);
-
-	while(count--)
-	    *buf++ = *port_addr;
+	port_addr = (volatile u8 *)ioport_map(port, 1);
+	while (count--)
+		*buf++ = *port_addr;
 }
 
-void generic_insw(unsigned long port, void *buffer, unsigned long count)
+void generic_insw(unsigned long port, void *dst, unsigned long count)
 {
-	volatile unsigned short *port_addr;
-	unsigned short *buf=buffer;
+	volatile u16 *port_addr;
+	u16 *buf = dst;
 
-	port_addr = (volatile unsigned short *)PORT2ADDR(port);
+	port_addr = (volatile u16 *)ioport_map(port, 2);
+	while (count--)
+		*buf++ = *port_addr;
 
-	while(count--)
-	    *buf++ = *port_addr;
-#ifdef SH3_PCMCIA_BUG_WORKAROUND
-	ctrl_inb (DUMMY_READ_AREA6);
-#endif
+	dummy_read();
 }
 
-void generic_insl(unsigned long port, void *buffer, unsigned long count)
+void generic_insl(unsigned long port, void *dst, unsigned long count)
 {
-	volatile unsigned long *port_addr;
-	unsigned long *buf=buffer;
+	volatile u32 *port_addr;
+	u32 *buf = dst;
 
-	port_addr = (volatile unsigned long *)PORT2ADDR(port);
+	port_addr = (volatile u32 *)ioport_map(port, 4);
+	while (count--)
+		*buf++ = *port_addr;
 
-	while(count--)
-	    *buf++ = *port_addr;
-#ifdef SH3_PCMCIA_BUG_WORKAROUND
-	ctrl_inb (DUMMY_READ_AREA6);
-#endif
+	dummy_read();
 }
 
-void generic_outb(unsigned char b, unsigned long port)
+void generic_outb(u8 b, unsigned long port)
 {
-	*(volatile unsigned char*)PORT2ADDR(port) = b;
+	ctrl_outb(b, (unsigned long __force)ioport_map(port, 1));
 }
 
-void generic_outw(unsigned short b, unsigned long port)
+void generic_outw(u16 b, unsigned long port)
 {
-	*(volatile unsigned short*)PORT2ADDR(port) = b;
+	ctrl_outw(b, (unsigned long __force)ioport_map(port, 2));
 }
 
-void generic_outl(unsigned int b, unsigned long port)
+void generic_outl(u32 b, unsigned long port)
 {
-        *(volatile unsigned long*)PORT2ADDR(port) = b;
+	ctrl_outl(b, (unsigned long __force)ioport_map(port, 4));
 }
 
-void generic_outb_p(unsigned char b, unsigned long port)
+void generic_outb_p(u8 b, unsigned long port)
 {
-	*(volatile unsigned char*)PORT2ADDR(port) = b;
+	generic_outb(b, port);
 	delay();
 }
 
-void generic_outw_p(unsigned short b, unsigned long port)
+void generic_outw_p(u16 b, unsigned long port)
 {
-	*(volatile unsigned short*)PORT2ADDR(port) = b;
+	generic_outw(b, port);
 	delay();
 }
 
-void generic_outl_p(unsigned int b, unsigned long port)
+void generic_outl_p(u32 b, unsigned long port)
 {
-	*(volatile unsigned long*)PORT2ADDR(port) = b;
+	generic_outl(b, port);
 	delay();
 }
 
@@ -154,90 +149,77 @@
  * address. However as the port address doesn't change we only need to
  * convert the port address to real address once.
  */
-
-void generic_outsb(unsigned long port, const void *buffer, unsigned long count)
+void generic_outsb(unsigned long port, const void *src, unsigned long count)
 {
-	volatile unsigned char *port_addr;
-	const unsigned char *buf=buffer;
+	volatile u8 *port_addr;
+	const u8 *buf = src;
 
-	port_addr = (volatile unsigned char *)PORT2ADDR(port);
+	port_addr = (volatile u8 __force *)ioport_map(port, 1);
 
-	while(count--)
-	    *port_addr = *buf++;
+	while (count--)
+		*port_addr = *buf++;
 }
 
-void generic_outsw(unsigned long port, const void *buffer, unsigned long count)
+void generic_outsw(unsigned long port, const void *src, unsigned long count)
 {
-	volatile unsigned short *port_addr;
-	const unsigned short *buf=buffer;
+	volatile u16 *port_addr;
+	const u16 *buf = src;
 
-	port_addr = (volatile unsigned short *)PORT2ADDR(port);
+	port_addr = (volatile u16 __force *)ioport_map(port, 2);
 
-	while(count--)
-	    *port_addr = *buf++;
+	while (count--)
+		*port_addr = *buf++;
 
-#ifdef SH3_PCMCIA_BUG_WORKAROUND
-	ctrl_inb (DUMMY_READ_AREA6);
-#endif
+	dummy_read();
 }
 
-void generic_outsl(unsigned long port, const void *buffer, unsigned long count)
+void generic_outsl(unsigned long port, const void *src, unsigned long count)
 {
-	volatile unsigned long *port_addr;
-	const unsigned long *buf=buffer;
+	volatile u32 *port_addr;
+	const u32 *buf = src;
 
-	port_addr = (volatile unsigned long *)PORT2ADDR(port);
+	port_addr = (volatile u32 __force *)ioport_map(port, 4);
+	while (count--)
+		*port_addr = *buf++;
 
-	while(count--)
-	    *port_addr = *buf++;
-
-#ifdef SH3_PCMCIA_BUG_WORKAROUND
-	ctrl_inb (DUMMY_READ_AREA6);
-#endif
+	dummy_read();
 }
 
-unsigned char generic_readb(unsigned long addr)
+u8 generic_readb(void __iomem *addr)
 {
-	return *(volatile unsigned char*)addr;
+	return ctrl_inb((unsigned long __force)addr);
 }
 
-unsigned short generic_readw(unsigned long addr)
+u16 generic_readw(void __iomem *addr)
 {
-	return *(volatile unsigned short*)addr;
+	return ctrl_inw((unsigned long __force)addr);
 }
 
-unsigned int generic_readl(unsigned long addr)
+u32 generic_readl(void __iomem *addr)
 {
-	return *(volatile unsigned long*)addr;
+	return ctrl_inl((unsigned long __force)addr);
 }
 
-void generic_writeb(unsigned char b, unsigned long addr)
+void generic_writeb(u8 b, void __iomem *addr)
 {
-	*(volatile unsigned char*)addr = b;
+	ctrl_outb(b, (unsigned long __force)addr);
 }
 
-void generic_writew(unsigned short b, unsigned long addr)
+void generic_writew(u16 b, void __iomem *addr)
 {
-	*(volatile unsigned short*)addr = b;
+	ctrl_outw(b, (unsigned long __force)addr);
 }
 
-void generic_writel(unsigned int b, unsigned long addr)
+void generic_writel(u32 b, void __iomem *addr)
 {
-        *(volatile unsigned long*)addr = b;
+	ctrl_outl(b, (unsigned long __force)addr);
 }
 
-void * generic_ioremap(unsigned long offset, unsigned long size)
+void __iomem *generic_ioport_map(unsigned long addr, unsigned int size)
 {
-	return (void *) P2SEGADDR(offset);
+	return (void __iomem *)(addr + generic_io_base);
 }
-EXPORT_SYMBOL(generic_ioremap);
 
-void generic_iounmap(void *addr)
+void generic_ioport_unmap(void __iomem *addr)
 {
 }
-EXPORT_SYMBOL(generic_iounmap);
-
-unsigned long generic_isa_port2addr(unsigned long offset)
-{
-	return offset + generic_io_base;
-}
diff --git a/arch/sh/mm/ioremap.c b/arch/sh/mm/ioremap.c
index e794e27..96fa4a9 100644
--- a/arch/sh/mm/ioremap.c
+++ b/arch/sh/mm/ioremap.c
@@ -6,13 +6,19 @@
  * 640k-1MB IO memory area on PC's
  *
  * (C) Copyright 1995 1996 Linus Torvalds
+ * (C) Copyright 2005, 2006 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of this
+ * archive for more details.
  */
-
 #include <linux/vmalloc.h>
+#include <linux/module.h>
 #include <linux/mm.h>
 #include <asm/io.h>
 #include <asm/page.h>
 #include <asm/pgalloc.h>
+#include <asm/addrspace.h>
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
@@ -80,9 +86,15 @@
 	if (address >= end)
 		BUG();
 	do {
+		pud_t *pud;
 		pmd_t *pmd;
-		pmd = pmd_alloc(&init_mm, dir, address);
+
 		error = -ENOMEM;
+
+		pud = pud_alloc(&init_mm, dir, address);
+		if (!pud)
+			break;
+		pmd = pmd_alloc(&init_mm, pud, address);
 		if (!pmd)
 			break;
 		if (remap_area_pmd(pmd, address, end - address,
@@ -97,10 +109,6 @@
 }
 
 /*
- * Generic mapping function (not visible outside):
- */
-
-/*
  * Remap an arbitrary physical address space into the kernel virtual
  * address space. Needed when the kernel wants to access high addresses
  * directly.
@@ -109,11 +117,11 @@
  * have to convert them into an offset in a page-aligned mapping, but the
  * caller shouldn't need to know that small detail.
  */
-void * p3_ioremap(unsigned long phys_addr, unsigned long size, unsigned long flags)
+void __iomem *__ioremap(unsigned long phys_addr, unsigned long size,
+			unsigned long flags)
 {
-	void * addr;
 	struct vm_struct * area;
-	unsigned long offset, last_addr;
+	unsigned long offset, last_addr, addr, orig_addr;
 
 	/* Don't allow wraparound or zero size */
 	last_addr = phys_addr + size - 1;
@@ -124,7 +132,7 @@
 	 * Don't remap the low PCI/ISA area, it's always mapped..
 	 */
 	if (phys_addr >= 0xA0000 && last_addr < 0x100000)
-		return phys_to_virt(phys_addr);
+		return (void __iomem *)phys_to_virt(phys_addr);
 
 	/*
 	 * Don't allow anybody to remap normal RAM that we're using..
@@ -146,16 +154,71 @@
 	if (!area)
 		return NULL;
 	area->phys_addr = phys_addr;
-	addr = area->addr;
-	if (remap_area_pages((unsigned long) addr, phys_addr, size, flags)) {
-		vunmap(addr);
-		return NULL;
-	}
-	return (void *) (offset + (char *)addr);
-}
+	orig_addr = addr = (unsigned long)area->addr;
 
-void p3_iounmap(void *addr)
-{
-	if (addr > high_memory)
-		vfree((void *)(PAGE_MASK & (unsigned long)addr));
+#ifdef CONFIG_32BIT
+	/*
+	 * First try to remap through the PMB once a valid VMA has been
+	 * established. Smaller allocations (or the rest of the size
+	 * remaining after a PMB mapping due to the size not being
+	 * perfectly aligned on a PMB size boundary) are then mapped
+	 * through the UTLB using conventional page tables.
+	 *
+	 * PMB entries are all pre-faulted.
+	 */
+	if (unlikely(size >= 0x1000000)) {
+		unsigned long mapped = pmb_remap(addr, phys_addr, size, flags);
+
+		if (likely(mapped)) {
+			addr		+= mapped;
+			phys_addr	+= mapped;
+			size		-= mapped;
+		}
+	}
+#endif
+
+	if (likely(size))
+		if (remap_area_pages(addr, phys_addr, size, flags)) {
+			vunmap((void *)orig_addr);
+			return NULL;
+		}
+
+	return (void __iomem *)(offset + (char *)orig_addr);
 }
+EXPORT_SYMBOL(__ioremap);
+
+void __iounmap(void __iomem *addr)
+{
+	unsigned long vaddr = (unsigned long __force)addr;
+	struct vm_struct *p;
+
+	if (PXSEG(vaddr) < P3SEG)
+		return;
+
+#ifdef CONFIG_32BIT
+	/*
+	 * Purge any PMB entries that may have been established for this
+	 * mapping, then proceed with conventional VMA teardown.
+	 *
+	 * XXX: Note that due to the way that remove_vm_area() does
+	 * matching of the resultant VMA, we aren't able to fast-forward
+	 * the address past the PMB space until the end of the VMA where
+	 * the page tables reside. As such, unmap_vm_area() will be
+	 * forced to linearly scan over the area until it finds the page
+	 * tables where PTEs that need to be unmapped actually reside,
+	 * which is far from optimal. Perhaps we need to use a separate
+	 * VMA for the PMB mappings?
+	 *					-- PFM.
+	 */
+	pmb_unmap(vaddr);
+#endif
+
+	p = remove_vm_area((void *)(vaddr & PAGE_MASK));
+	if (!p) {
+		printk(KERN_ERR "%s: bad address %p\n", __FUNCTION__, addr);
+		return;
+	}
+
+	kfree(p);
+}
+EXPORT_SYMBOL(__iounmap);