Linux-2.6.12-rc2

Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.

Let it rip!
diff --git a/arch/alpha/mm/Makefile b/arch/alpha/mm/Makefile
new file mode 100644
index 0000000..6edd9a0
--- /dev/null
+++ b/arch/alpha/mm/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the linux alpha-specific parts of the memory manager.
+#
+
+EXTRA_CFLAGS := -Werror
+
+obj-y	:= init.o fault.o extable.o remap.o
+
+obj-$(CONFIG_DISCONTIGMEM) += numa.o
diff --git a/arch/alpha/mm/extable.c b/arch/alpha/mm/extable.c
new file mode 100644
index 0000000..c3849ba
--- /dev/null
+++ b/arch/alpha/mm/extable.c
@@ -0,0 +1,34 @@
+/*
+ * linux/arch/alpha/mm/extable.c
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <asm/uaccess.h>
+
+void sort_extable(struct exception_table_entry *start,
+		  struct exception_table_entry *finish)
+{
+}
+
+const struct exception_table_entry *
+search_extable(const struct exception_table_entry *first,
+	       const struct exception_table_entry *last,
+	       unsigned long value)
+{
+        while (first <= last) {
+		const struct exception_table_entry *mid;
+		unsigned long mid_value;
+
+		mid = (last - first) / 2 + first;
+		mid_value = (unsigned long)&mid->insn + mid->insn;
+                if (mid_value == value)
+                        return mid;
+                else if (mid_value < value)
+                        first = mid+1;
+                else
+                        last = mid-1;
+        }
+
+        return NULL;
+}
diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c
new file mode 100644
index 0000000..64ace5a
--- /dev/null
+++ b/arch/alpha/mm/fault.c
@@ -0,0 +1,247 @@
+/*
+ *  linux/arch/alpha/mm/fault.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+#include <linux/config.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <asm/io.h>
+
+#define __EXTERN_INLINE inline
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#undef  __EXTERN_INLINE
+
+#include <linux/signal.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/smp.h>
+#include <linux/smp_lock.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+
+extern void die_if_kernel(char *,struct pt_regs *,long, unsigned long *);
+
+
+/*
+ * Force a new ASN for a task.
+ */
+
+#ifndef CONFIG_SMP
+unsigned long last_asn = ASN_FIRST_VERSION;
+#endif
+
+void
+__load_new_mm_context(struct mm_struct *next_mm)
+{
+	unsigned long mmc;
+	struct pcb_struct *pcb;
+
+	mmc = __get_new_mm_context(next_mm, smp_processor_id());
+	next_mm->context[smp_processor_id()] = mmc;
+
+	pcb = &current_thread_info()->pcb;
+	pcb->asn = mmc & HARDWARE_ASN_MASK;
+	pcb->ptbr = ((unsigned long) next_mm->pgd - IDENT_ADDR) >> PAGE_SHIFT;
+
+	__reload_thread(pcb);
+}
+
+
+/*
+ * This routine handles page faults.  It determines the address,
+ * and the problem, and then passes it off to handle_mm_fault().
+ *
+ * mmcsr:
+ *	0 = translation not valid
+ *	1 = access violation
+ *	2 = fault-on-read
+ *	3 = fault-on-execute
+ *	4 = fault-on-write
+ *
+ * cause:
+ *	-1 = instruction fetch
+ *	0 = load
+ *	1 = store
+ *
+ * Registers $9 through $15 are saved in a block just prior to `regs' and
+ * are saved and restored around the call to allow exception code to
+ * modify them.
+ */
+
+/* Macro for exception fixup code to access integer registers.  */
+#define dpf_reg(r)							\
+	(((unsigned long *)regs)[(r) <= 8 ? (r) : (r) <= 15 ? (r)-16 :	\
+				 (r) <= 18 ? (r)+8 : (r)-10])
+
+asmlinkage void
+do_page_fault(unsigned long address, unsigned long mmcsr,
+	      long cause, struct pt_regs *regs)
+{
+	struct vm_area_struct * vma;
+	struct mm_struct *mm = current->mm;
+	const struct exception_table_entry *fixup;
+	int fault, si_code = SEGV_MAPERR;
+	siginfo_t info;
+
+	/* As of EV6, a load into $31/$f31 is a prefetch, and never faults
+	   (or is suppressed by the PALcode).  Support that for older CPUs
+	   by ignoring such an instruction.  */
+	if (cause == 0) {
+		unsigned int insn;
+		__get_user(insn, (unsigned int __user *)regs->pc);
+		if ((insn >> 21 & 0x1f) == 0x1f &&
+		    /* ldq ldl ldt lds ldg ldf ldwu ldbu */
+		    (1ul << (insn >> 26) & 0x30f00001400ul)) {
+			regs->pc += 4;
+			return;
+		}
+	}
+
+	/* If we're in an interrupt context, or have no user context,
+	   we must not take the fault.  */
+	if (!mm || in_interrupt())
+		goto no_context;
+
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+	if (address >= TASK_SIZE)
+		goto vmalloc_fault;
+#endif
+
+	down_read(&mm->mmap_sem);
+	vma = find_vma(mm, address);
+	if (!vma)
+		goto bad_area;
+	if (vma->vm_start <= address)
+		goto good_area;
+	if (!(vma->vm_flags & VM_GROWSDOWN))
+		goto bad_area;
+	if (expand_stack(vma, address))
+		goto bad_area;
+
+	/* Ok, we have a good vm_area for this memory access, so
+	   we can handle it.  */
+ good_area:
+	si_code = SEGV_ACCERR;
+	if (cause < 0) {
+		if (!(vma->vm_flags & VM_EXEC))
+			goto bad_area;
+	} else if (!cause) {
+		/* Allow reads even for write-only mappings */
+		if (!(vma->vm_flags & (VM_READ | VM_WRITE)))
+			goto bad_area;
+	} else {
+		if (!(vma->vm_flags & VM_WRITE))
+			goto bad_area;
+	}
+
+ survive:
+	/* If for any reason at all we couldn't handle the fault,
+	   make sure we exit gracefully rather than endlessly redo
+	   the fault.  */
+	fault = handle_mm_fault(mm, vma, address, cause > 0);
+	up_read(&mm->mmap_sem);
+
+	switch (fault) {
+	      case VM_FAULT_MINOR:
+		current->min_flt++;
+		break;
+	      case VM_FAULT_MAJOR:
+		current->maj_flt++;
+		break;
+	      case VM_FAULT_SIGBUS:
+		goto do_sigbus;
+	      case VM_FAULT_OOM:
+		goto out_of_memory;
+	      default:
+		BUG();
+	}
+	return;
+
+	/* Something tried to access memory that isn't in our memory map.
+	   Fix it, but check if it's kernel or user first.  */
+ bad_area:
+	up_read(&mm->mmap_sem);
+
+	if (user_mode(regs))
+		goto do_sigsegv;
+
+ no_context:
+	/* Are we prepared to handle this fault as an exception?  */
+	if ((fixup = search_exception_tables(regs->pc)) != 0) {
+		unsigned long newpc;
+		newpc = fixup_exception(dpf_reg, fixup, regs->pc);
+		regs->pc = newpc;
+		return;
+	}
+
+	/* Oops. The kernel tried to access some bad page. We'll have to
+	   terminate things with extreme prejudice.  */
+	printk(KERN_ALERT "Unable to handle kernel paging request at "
+	       "virtual address %016lx\n", address);
+	die_if_kernel("Oops", regs, cause, (unsigned long*)regs - 16);
+	do_exit(SIGKILL);
+
+	/* We ran out of memory, or some other thing happened to us that
+	   made us unable to handle the page fault gracefully.  */
+ out_of_memory:
+	if (current->pid == 1) {
+		yield();
+		down_read(&mm->mmap_sem);
+		goto survive;
+	}
+	printk(KERN_ALERT "VM: killing process %s(%d)\n",
+	       current->comm, current->pid);
+	if (!user_mode(regs))
+		goto no_context;
+	do_exit(SIGKILL);
+
+ do_sigbus:
+	/* Send a sigbus, regardless of whether we were in kernel
+	   or user mode.  */
+	info.si_signo = SIGBUS;
+	info.si_errno = 0;
+	info.si_code = BUS_ADRERR;
+	info.si_addr = (void __user *) address;
+	force_sig_info(SIGBUS, &info, current);
+	if (!user_mode(regs))
+		goto no_context;
+	return;
+
+ do_sigsegv:
+	info.si_signo = SIGSEGV;
+	info.si_errno = 0;
+	info.si_code = si_code;
+	info.si_addr = (void __user *) address;
+	force_sig_info(SIGSEGV, &info, current);
+	return;
+
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+ vmalloc_fault:
+	if (user_mode(regs))
+		goto do_sigsegv;
+	else {
+		/* Synchronize this task's top level page-table
+		   with the "reference" page table from init.  */
+		long index = pgd_index(address);
+		pgd_t *pgd, *pgd_k;
+
+		pgd = current->active_mm->pgd + index;
+		pgd_k = swapper_pg_dir + index;
+		if (!pgd_present(*pgd) && pgd_present(*pgd_k)) {
+			pgd_val(*pgd) = pgd_val(*pgd_k);
+			return;
+		}
+		goto no_context;
+	}
+#endif
+}
diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c
new file mode 100644
index 0000000..90752f6
--- /dev/null
+++ b/arch/alpha/mm/init.c
@@ -0,0 +1,382 @@
+/*
+ *  linux/arch/alpha/mm/init.c
+ *
+ *  Copyright (C) 1995  Linus Torvalds
+ */
+
+/* 2.3.x zone allocator, 1999 Andrea Arcangeli <andrea@suse.de> */
+
+#include <linux/config.h>
+#include <linux/signal.h>
+#include <linux/sched.h>
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/mman.h>
+#include <linux/mm.h>
+#include <linux/swap.h>
+#include <linux/init.h>
+#include <linux/bootmem.h> /* max_low_pfn */
+#include <linux/vmalloc.h>
+
+#include <asm/system.h>
+#include <asm/uaccess.h>
+#include <asm/pgtable.h>
+#include <asm/pgalloc.h>
+#include <asm/hwrpb.h>
+#include <asm/dma.h>
+#include <asm/mmu_context.h>
+#include <asm/console.h>
+#include <asm/tlb.h>
+
+DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+extern void die_if_kernel(char *,struct pt_regs *,long);
+
+static struct pcb_struct original_pcb;
+
+pgd_t *
+pgd_alloc(struct mm_struct *mm)
+{
+	pgd_t *ret, *init;
+
+	ret = (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
+	init = pgd_offset(&init_mm, 0UL);
+	if (ret) {
+#ifdef CONFIG_ALPHA_LARGE_VMALLOC
+		memcpy (ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
+			(PTRS_PER_PGD - USER_PTRS_PER_PGD - 1)*sizeof(pgd_t));
+#else
+		pgd_val(ret[PTRS_PER_PGD-2]) = pgd_val(init[PTRS_PER_PGD-2]);
+#endif
+
+		/* The last PGD entry is the VPTB self-map.  */
+		pgd_val(ret[PTRS_PER_PGD-1])
+		  = pte_val(mk_pte(virt_to_page(ret), PAGE_KERNEL));
+	}
+	return ret;
+}
+
+pte_t *
+pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address)
+{
+	pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO);
+	return pte;
+}
+
+
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving an inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+pmd_t *
+__bad_pagetable(void)
+{
+	memset((void *) EMPTY_PGT, 0, PAGE_SIZE);
+	return (pmd_t *) EMPTY_PGT;
+}
+
+pte_t
+__bad_page(void)
+{
+	memset((void *) EMPTY_PGE, 0, PAGE_SIZE);
+	return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED));
+}
+
+#ifndef CONFIG_DISCONTIGMEM
+void
+show_mem(void)
+{
+	long i,free = 0,total = 0,reserved = 0;
+	long shared = 0, cached = 0;
+
+	printk("\nMem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	i = max_mapnr;
+	while (i-- > 0) {
+		total++;
+		if (PageReserved(mem_map+i))
+			reserved++;
+		else if (PageSwapCache(mem_map+i))
+			cached++;
+		else if (!page_count(mem_map+i))
+			free++;
+		else
+			shared += page_count(mem_map + i) - 1;
+	}
+	printk("%ld pages of RAM\n",total);
+	printk("%ld free pages\n",free);
+	printk("%ld reserved pages\n",reserved);
+	printk("%ld pages shared\n",shared);
+	printk("%ld pages swap cached\n",cached);
+}
+#endif
+
+static inline unsigned long
+load_PCB(struct pcb_struct *pcb)
+{
+	register unsigned long sp __asm__("$30");
+	pcb->ksp = sp;
+	return __reload_thread(pcb);
+}
+
+/* Set up initial PCB, VPTB, and other such nicities.  */
+
+static inline void
+switch_to_system_map(void)
+{
+	unsigned long newptbr;
+	unsigned long original_pcb_ptr;
+
+	/* Initialize the kernel's page tables.  Linux puts the vptb in
+	   the last slot of the L1 page table.  */
+	memset(swapper_pg_dir, 0, PAGE_SIZE);
+	newptbr = ((unsigned long) swapper_pg_dir - PAGE_OFFSET) >> PAGE_SHIFT;
+	pgd_val(swapper_pg_dir[1023]) =
+		(newptbr << 32) | pgprot_val(PAGE_KERNEL);
+
+	/* Set the vptb.  This is often done by the bootloader, but 
+	   shouldn't be required.  */
+	if (hwrpb->vptb != 0xfffffffe00000000UL) {
+		wrvptptr(0xfffffffe00000000UL);
+		hwrpb->vptb = 0xfffffffe00000000UL;
+		hwrpb_update_checksum(hwrpb);
+	}
+
+	/* Also set up the real kernel PCB while we're at it.  */
+	init_thread_info.pcb.ptbr = newptbr;
+	init_thread_info.pcb.flags = 1;	/* set FEN, clear everything else */
+	original_pcb_ptr = load_PCB(&init_thread_info.pcb);
+	tbia();
+
+	/* Save off the contents of the original PCB so that we can
+	   restore the original console's page tables for a clean reboot.
+
+	   Note that the PCB is supposed to be a physical address, but
+	   since KSEG values also happen to work, folks get confused.
+	   Check this here.  */
+
+	if (original_pcb_ptr < PAGE_OFFSET) {
+		original_pcb_ptr = (unsigned long)
+			phys_to_virt(original_pcb_ptr);
+	}
+	original_pcb = *(struct pcb_struct *) original_pcb_ptr;
+}
+
+int callback_init_done;
+
+void * __init
+callback_init(void * kernel_end)
+{
+	struct crb_struct * crb;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	void *two_pages;
+
+	/* Starting at the HWRPB, locate the CRB. */
+	crb = (struct crb_struct *)((char *)hwrpb + hwrpb->crb_offset);
+
+	if (alpha_using_srm) {
+		/* Tell the console whither it is to be remapped. */
+		if (srm_fixup(VMALLOC_START, (unsigned long)hwrpb))
+			__halt();		/* "We're boned."  --Bender */
+
+		/* Edit the procedure descriptors for DISPATCH and FIXUP. */
+		crb->dispatch_va = (struct procdesc_struct *)
+			(VMALLOC_START + (unsigned long)crb->dispatch_va
+			 - crb->map[0].va);
+		crb->fixup_va = (struct procdesc_struct *)
+			(VMALLOC_START + (unsigned long)crb->fixup_va
+			 - crb->map[0].va);
+	}
+
+	switch_to_system_map();
+
+	/* Allocate one PGD and one PMD.  In the case of SRM, we'll need
+	   these to actually remap the console.  There is an assumption
+	   here that only one of each is needed, and this allows for 8MB.
+	   On systems with larger consoles, additional pages will be
+	   allocated as needed during the mapping process.
+
+	   In the case of not SRM, but not CONFIG_ALPHA_LARGE_VMALLOC,
+	   we need to allocate the PGD we use for vmalloc before we start
+	   forking other tasks.  */
+
+	two_pages = (void *)
+	  (((unsigned long)kernel_end + ~PAGE_MASK) & PAGE_MASK);
+	kernel_end = two_pages + 2*PAGE_SIZE;
+	memset(two_pages, 0, 2*PAGE_SIZE);
+
+	pgd = pgd_offset_k(VMALLOC_START);
+	pgd_set(pgd, (pmd_t *)two_pages);
+	pmd = pmd_offset(pgd, VMALLOC_START);
+	pmd_set(pmd, (pte_t *)(two_pages + PAGE_SIZE));
+
+	if (alpha_using_srm) {
+		static struct vm_struct console_remap_vm;
+		unsigned long vaddr = VMALLOC_START;
+		unsigned long i, j;
+
+		/* Set up the third level PTEs and update the virtual
+		   addresses of the CRB entries.  */
+		for (i = 0; i < crb->map_entries; ++i) {
+			unsigned long pfn = crb->map[i].pa >> PAGE_SHIFT;
+			crb->map[i].va = vaddr;
+			for (j = 0; j < crb->map[i].count; ++j) {
+				/* Newer console's (especially on larger
+				   systems) may require more pages of
+				   PTEs. Grab additional pages as needed. */
+				if (pmd != pmd_offset(pgd, vaddr)) {
+					memset(kernel_end, 0, PAGE_SIZE);
+					pmd = pmd_offset(pgd, vaddr);
+					pmd_set(pmd, (pte_t *)kernel_end);
+					kernel_end += PAGE_SIZE;
+				}
+				set_pte(pte_offset_kernel(pmd, vaddr),
+					pfn_pte(pfn, PAGE_KERNEL));
+				pfn++;
+				vaddr += PAGE_SIZE;
+			}
+		}
+
+		/* Let vmalloc know that we've allocated some space.  */
+		console_remap_vm.flags = VM_ALLOC;
+		console_remap_vm.addr = (void *) VMALLOC_START;
+		console_remap_vm.size = vaddr - VMALLOC_START;
+		vmlist = &console_remap_vm;
+	}
+
+	callback_init_done = 1;
+	return kernel_end;
+}
+
+
+#ifndef CONFIG_DISCONTIGMEM
+/*
+ * paging_init() sets up the memory map.
+ */
+void
+paging_init(void)
+{
+	unsigned long zones_size[MAX_NR_ZONES] = {0, 0, 0};
+	unsigned long dma_pfn, high_pfn;
+
+	dma_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+	high_pfn = max_pfn = max_low_pfn;
+
+	if (dma_pfn >= high_pfn)
+		zones_size[ZONE_DMA] = high_pfn;
+	else {
+		zones_size[ZONE_DMA] = dma_pfn;
+		zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
+	}
+
+	/* Initialize mem_map[].  */
+	free_area_init(zones_size);
+
+	/* Initialize the kernel's ZERO_PGE. */
+	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+}
+#endif /* CONFIG_DISCONTIGMEM */
+
+#if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM)
+void
+srm_paging_stop (void)
+{
+	/* Move the vptb back to where the SRM console expects it.  */
+	swapper_pg_dir[1] = swapper_pg_dir[1023];
+	tbia();
+	wrvptptr(0x200000000UL);
+	hwrpb->vptb = 0x200000000UL;
+	hwrpb_update_checksum(hwrpb);
+
+	/* Reload the page tables that the console had in use.  */
+	load_PCB(&original_pcb);
+	tbia();
+}
+#endif
+
+#ifndef CONFIG_DISCONTIGMEM
+static void __init
+printk_memory_info(void)
+{
+	unsigned long codesize, reservedpages, datasize, initsize, tmp;
+	extern int page_is_ram(unsigned long) __init;
+	extern char _text, _etext, _data, _edata;
+	extern char __init_begin, __init_end;
+
+	/* printk all informations */
+	reservedpages = 0;
+	for (tmp = 0; tmp < max_low_pfn; tmp++)
+		/*
+		 * Only count reserved RAM pages
+		 */
+		if (page_is_ram(tmp) && PageReserved(mem_map+tmp))
+			reservedpages++;
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_data;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, %luk data, %luk init)\n",
+	       (unsigned long) nr_free_pages() << (PAGE_SHIFT-10),
+	       max_mapnr << (PAGE_SHIFT-10),
+	       codesize >> 10,
+	       reservedpages << (PAGE_SHIFT-10),
+	       datasize >> 10,
+	       initsize >> 10);
+}
+
+void __init
+mem_init(void)
+{
+	max_mapnr = num_physpages = max_low_pfn;
+	totalram_pages += free_all_bootmem();
+	high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
+
+	printk_memory_info();
+}
+#endif /* CONFIG_DISCONTIGMEM */
+
+void
+free_reserved_mem(void *start, void *end)
+{
+	void *__start = start;
+	for (; __start < end; __start += PAGE_SIZE) {
+		ClearPageReserved(virt_to_page(__start));
+		set_page_count(virt_to_page(__start), 1);
+		free_page((long)__start);
+		totalram_pages++;
+	}
+}
+
+void
+free_initmem(void)
+{
+	extern char __init_begin, __init_end;
+
+	free_reserved_mem(&__init_begin, &__init_end);
+	printk ("Freeing unused kernel memory: %ldk freed\n",
+		(&__init_end - &__init_begin) >> 10);
+}
+
+#ifdef CONFIG_BLK_DEV_INITRD
+void
+free_initrd_mem(unsigned long start, unsigned long end)
+{
+	free_reserved_mem((void *)start, (void *)end);
+	printk ("Freeing initrd memory: %ldk freed\n", (end - start) >> 10);
+}
+#endif
diff --git a/arch/alpha/mm/numa.c b/arch/alpha/mm/numa.c
new file mode 100644
index 0000000..ba81c44
--- /dev/null
+++ b/arch/alpha/mm/numa.c
@@ -0,0 +1,395 @@
+/*
+ *  linux/arch/alpha/mm/numa.c
+ *
+ *  DISCONTIGMEM NUMA alpha support.
+ *
+ *  Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
+ */
+
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/swap.h>
+#include <linux/initrd.h>
+
+#include <asm/hwrpb.h>
+#include <asm/pgalloc.h>
+
+pg_data_t node_data[MAX_NUMNODES];
+bootmem_data_t node_bdata[MAX_NUMNODES];
+
+#undef DEBUG_DISCONTIG
+#ifdef DEBUG_DISCONTIG
+#define DBGDCONT(args...) printk(args)
+#else
+#define DBGDCONT(args...)
+#endif
+
+#define PFN_UP(x)       (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)     ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)     ((x) << PAGE_SHIFT)
+#define for_each_mem_cluster(memdesc, cluster, i)		\
+	for ((cluster) = (memdesc)->cluster, (i) = 0;		\
+	     (i) < (memdesc)->numclusters; (i)++, (cluster)++)
+
+static void __init show_mem_layout(void)
+{
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	int i;
+
+	/* Find free clusters, and init and free the bootmem accordingly.  */
+	memdesc = (struct memdesc_struct *)
+	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
+
+	printk("Raw memory layout:\n");
+	for_each_mem_cluster(memdesc, cluster, i) {
+		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
+		       i, cluster->usage, cluster->start_pfn,
+		       cluster->start_pfn + cluster->numpages);
+	}
+}
+
+static void __init
+setup_memory_node(int nid, void *kernel_end)
+{
+	extern unsigned long mem_size_limit;
+	struct memclust_struct * cluster;
+	struct memdesc_struct * memdesc;
+	unsigned long start_kernel_pfn, end_kernel_pfn;
+	unsigned long bootmap_size, bootmap_pages, bootmap_start;
+	unsigned long start, end;
+	unsigned long node_pfn_start, node_pfn_end;
+	unsigned long node_min_pfn, node_max_pfn;
+	int i;
+	unsigned long node_datasz = PFN_UP(sizeof(pg_data_t));
+	int show_init = 0;
+
+	/* Find the bounds of current node */
+	node_pfn_start = (node_mem_start(nid)) >> PAGE_SHIFT;
+	node_pfn_end = node_pfn_start + (node_mem_size(nid) >> PAGE_SHIFT);
+	
+	/* Find free clusters, and init and free the bootmem accordingly.  */
+	memdesc = (struct memdesc_struct *)
+	  (hwrpb->mddt_offset + (unsigned long) hwrpb);
+
+	/* find the bounds of this node (node_min_pfn/node_max_pfn) */
+	node_min_pfn = ~0UL;
+	node_max_pfn = 0UL;
+	for_each_mem_cluster(memdesc, cluster, i) {
+		/* Bit 0 is console/PALcode reserved.  Bit 1 is
+		   non-volatile memory -- we might want to mark
+		   this for later.  */
+		if (cluster->usage & 3)
+			continue;
+
+		start = cluster->start_pfn;
+		end = start + cluster->numpages;
+
+		if (start >= node_pfn_end || end <= node_pfn_start)
+			continue;
+
+		if (!show_init) {
+			show_init = 1;
+			printk("Initializing bootmem allocator on Node ID %d\n", nid);
+		}
+		printk(" memcluster %2d, usage %1lx, start %8lu, end %8lu\n",
+		       i, cluster->usage, cluster->start_pfn,
+		       cluster->start_pfn + cluster->numpages);
+
+		if (start < node_pfn_start)
+			start = node_pfn_start;
+		if (end > node_pfn_end)
+			end = node_pfn_end;
+
+		if (start < node_min_pfn)
+			node_min_pfn = start;
+		if (end > node_max_pfn)
+			node_max_pfn = end;
+	}
+
+	if (mem_size_limit && node_max_pfn > mem_size_limit) {
+		static int msg_shown = 0;
+		if (!msg_shown) {
+			msg_shown = 1;
+			printk("setup: forcing memory size to %ldK (from %ldK).\n",
+			       mem_size_limit << (PAGE_SHIFT - 10),
+			       node_max_pfn    << (PAGE_SHIFT - 10));
+		}
+		node_max_pfn = mem_size_limit;
+	}
+
+	if (node_min_pfn >= node_max_pfn)
+		return;
+
+	/* Update global {min,max}_low_pfn from node information. */
+	if (node_min_pfn < min_low_pfn)
+		min_low_pfn = node_min_pfn;
+	if (node_max_pfn > max_low_pfn)
+		max_pfn = max_low_pfn = node_max_pfn;
+
+	num_physpages += node_max_pfn - node_min_pfn;
+
+#if 0 /* we'll try this one again in a little while */
+	/* Cute trick to make sure our local node data is on local memory */
+	node_data[nid] = (pg_data_t *)(__va(node_min_pfn << PAGE_SHIFT));
+#endif
+	/* Quasi-mark the pg_data_t as in-use */
+	node_min_pfn += node_datasz;
+	if (node_min_pfn >= node_max_pfn) {
+		printk(" not enough mem to reserve NODE_DATA");
+		return;
+	}
+	NODE_DATA(nid)->bdata = &node_bdata[nid];
+
+	printk(" Detected node memory:   start %8lu, end %8lu\n",
+	       node_min_pfn, node_max_pfn);
+
+	DBGDCONT(" DISCONTIG: node_data[%d]   is at 0x%p\n", nid, NODE_DATA(nid));
+	DBGDCONT(" DISCONTIG: NODE_DATA(%d)->bdata is at 0x%p\n", nid, NODE_DATA(nid)->bdata);
+
+	/* Find the bounds of kernel memory.  */
+	start_kernel_pfn = PFN_DOWN(KERNEL_START_PHYS);
+	end_kernel_pfn = PFN_UP(virt_to_phys(kernel_end));
+	bootmap_start = -1;
+
+	if (!nid && (node_max_pfn < end_kernel_pfn || node_min_pfn > start_kernel_pfn))
+		panic("kernel loaded out of ram");
+
+	/* Zone start phys-addr must be 2^(MAX_ORDER-1) aligned.
+	   Note that we round this down, not up - node memory
+	   has much larger alignment than 8Mb, so it's safe. */
+	node_min_pfn &= ~((1UL << (MAX_ORDER-1))-1);
+
+	/* We need to know how many physically contiguous pages
+	   we'll need for the bootmap.  */
+	bootmap_pages = bootmem_bootmap_pages(node_max_pfn-node_min_pfn);
+
+	/* Now find a good region where to allocate the bootmap.  */
+	for_each_mem_cluster(memdesc, cluster, i) {
+		if (cluster->usage & 3)
+			continue;
+
+		start = cluster->start_pfn;
+		end = start + cluster->numpages;
+
+		if (start >= node_max_pfn || end <= node_min_pfn)
+			continue;
+
+		if (end > node_max_pfn)
+			end = node_max_pfn;
+		if (start < node_min_pfn)
+			start = node_min_pfn;
+
+		if (start < start_kernel_pfn) {
+			if (end > end_kernel_pfn
+			    && end - end_kernel_pfn >= bootmap_pages) {
+				bootmap_start = end_kernel_pfn;
+				break;
+			} else if (end > start_kernel_pfn)
+				end = start_kernel_pfn;
+		} else if (start < end_kernel_pfn)
+			start = end_kernel_pfn;
+		if (end - start >= bootmap_pages) {
+			bootmap_start = start;
+			break;
+		}
+	}
+
+	if (bootmap_start == -1)
+		panic("couldn't find a contigous place for the bootmap");
+
+	/* Allocate the bootmap and mark the whole MM as reserved.  */
+	bootmap_size = init_bootmem_node(NODE_DATA(nid), bootmap_start,
+					 node_min_pfn, node_max_pfn);
+	DBGDCONT(" bootmap_start %lu, bootmap_size %lu, bootmap_pages %lu\n",
+		 bootmap_start, bootmap_size, bootmap_pages);
+
+	/* Mark the free regions.  */
+	for_each_mem_cluster(memdesc, cluster, i) {
+		if (cluster->usage & 3)
+			continue;
+
+		start = cluster->start_pfn;
+		end = cluster->start_pfn + cluster->numpages;
+
+		if (start >= node_max_pfn || end <= node_min_pfn)
+			continue;
+
+		if (end > node_max_pfn)
+			end = node_max_pfn;
+		if (start < node_min_pfn)
+			start = node_min_pfn;
+
+		if (start < start_kernel_pfn) {
+			if (end > end_kernel_pfn) {
+				free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start),
+					     (PFN_PHYS(start_kernel_pfn)
+					      - PFN_PHYS(start)));
+				printk(" freeing pages %ld:%ld\n",
+				       start, start_kernel_pfn);
+				start = end_kernel_pfn;
+			} else if (end > start_kernel_pfn)
+				end = start_kernel_pfn;
+		} else if (start < end_kernel_pfn)
+			start = end_kernel_pfn;
+		if (start >= end)
+			continue;
+
+		free_bootmem_node(NODE_DATA(nid), PFN_PHYS(start), PFN_PHYS(end) - PFN_PHYS(start));
+		printk(" freeing pages %ld:%ld\n", start, end);
+	}
+
+	/* Reserve the bootmap memory.  */
+	reserve_bootmem_node(NODE_DATA(nid), PFN_PHYS(bootmap_start), bootmap_size);
+	printk(" reserving pages %ld:%ld\n", bootmap_start, bootmap_start+PFN_UP(bootmap_size));
+
+	node_set_online(nid);
+}
+
+void __init
+setup_memory(void *kernel_end)
+{
+	int nid;
+
+	show_mem_layout();
+
+	nodes_clear(node_online_map);
+
+	min_low_pfn = ~0UL;
+	max_low_pfn = 0UL;
+	for (nid = 0; nid < MAX_NUMNODES; nid++)
+		setup_memory_node(nid, kernel_end);
+
+#ifdef CONFIG_BLK_DEV_INITRD
+	initrd_start = INITRD_START;
+	if (initrd_start) {
+		extern void *move_initrd(unsigned long);
+
+		initrd_end = initrd_start+INITRD_SIZE;
+		printk("Initial ramdisk at: 0x%p (%lu bytes)\n",
+		       (void *) initrd_start, INITRD_SIZE);
+
+		if ((void *)initrd_end > phys_to_virt(PFN_PHYS(max_low_pfn))) {
+			if (!move_initrd(PFN_PHYS(max_low_pfn)))
+				printk("initrd extends beyond end of memory "
+				       "(0x%08lx > 0x%p)\ndisabling initrd\n",
+				       initrd_end,
+				       phys_to_virt(PFN_PHYS(max_low_pfn)));
+		} else {
+			nid = kvaddr_to_nid(initrd_start);
+			reserve_bootmem_node(NODE_DATA(nid),
+					     virt_to_phys((void *)initrd_start),
+					     INITRD_SIZE);
+		}
+	}
+#endif /* CONFIG_BLK_DEV_INITRD */
+}
+
+void __init paging_init(void)
+{
+	unsigned int    nid;
+	unsigned long   zones_size[MAX_NR_ZONES] = {0, };
+	unsigned long	dma_local_pfn;
+
+	/*
+	 * The old global MAX_DMA_ADDRESS per-arch API doesn't fit
+	 * in the NUMA model, for now we convert it to a pfn and
+	 * we interpret this pfn as a local per-node information.
+	 * This issue isn't very important since none of these machines
+	 * have legacy ISA slots anyways.
+	 */
+	dma_local_pfn = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
+
+	for_each_online_node(nid) {
+		unsigned long start_pfn = node_bdata[nid].node_boot_start >> PAGE_SHIFT;
+		unsigned long end_pfn = node_bdata[nid].node_low_pfn;
+
+		if (dma_local_pfn >= end_pfn - start_pfn)
+			zones_size[ZONE_DMA] = end_pfn - start_pfn;
+		else {
+			zones_size[ZONE_DMA] = dma_local_pfn;
+			zones_size[ZONE_NORMAL] = (end_pfn - start_pfn) - dma_local_pfn;
+		}
+		free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn, NULL);
+	}
+
+	/* Initialize the kernel's ZERO_PGE. */
+	memset((void *)ZERO_PGE, 0, PAGE_SIZE);
+}
+
+void __init mem_init(void)
+{
+	unsigned long codesize, reservedpages, datasize, initsize, pfn;
+	extern int page_is_ram(unsigned long) __init;
+	extern char _text, _etext, _data, _edata;
+	extern char __init_begin, __init_end;
+	unsigned long nid, i;
+	struct page * lmem_map;
+
+	high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
+
+	reservedpages = 0;
+	for_each_online_node(nid) {
+		/*
+		 * This will free up the bootmem, ie, slot 0 memory
+		 */
+		totalram_pages += free_all_bootmem_node(NODE_DATA(nid));
+
+		lmem_map = node_mem_map(nid);
+		pfn = NODE_DATA(nid)->node_start_pfn;
+		for (i = 0; i < node_spanned_pages(nid); i++, pfn++)
+			if (page_is_ram(pfn) && PageReserved(lmem_map+i))
+				reservedpages++;
+	}
+
+	codesize =  (unsigned long) &_etext - (unsigned long) &_text;
+	datasize =  (unsigned long) &_edata - (unsigned long) &_data;
+	initsize =  (unsigned long) &__init_end - (unsigned long) &__init_begin;
+
+	printk("Memory: %luk/%luk available (%luk kernel code, %luk reserved, "
+	       "%luk data, %luk init)\n",
+	       (unsigned long)nr_free_pages() << (PAGE_SHIFT-10),
+	       num_physpages << (PAGE_SHIFT-10),
+	       codesize >> 10,
+	       reservedpages << (PAGE_SHIFT-10),
+	       datasize >> 10,
+	       initsize >> 10);
+#if 0
+	mem_stress();
+#endif
+}
+
+void
+show_mem(void)
+{
+	long i,free = 0,total = 0,reserved = 0;
+	long shared = 0, cached = 0;
+	int nid;
+
+	printk("\nMem-info:\n");
+	show_free_areas();
+	printk("Free swap:       %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
+	for_each_online_node(nid) {
+		struct page * lmem_map = node_mem_map(nid);
+		i = node_spanned_pages(nid);
+		while (i-- > 0) {
+			total++;
+			if (PageReserved(lmem_map+i))
+				reserved++;
+			else if (PageSwapCache(lmem_map+i))
+				cached++;
+			else if (!page_count(lmem_map+i))
+				free++;
+			else
+				shared += page_count(lmem_map + i) - 1;
+		}
+	}
+	printk("%ld pages of RAM\n",total);
+	printk("%ld free pages\n",free);
+	printk("%ld reserved pages\n",reserved);
+	printk("%ld pages shared\n",shared);
+	printk("%ld pages swap cached\n",cached);
+}
diff --git a/arch/alpha/mm/remap.c b/arch/alpha/mm/remap.c
new file mode 100644
index 0000000..19817ad
--- /dev/null
+++ b/arch/alpha/mm/remap.c
@@ -0,0 +1,90 @@
+#include <linux/vmalloc.h>
+#include <asm/pgalloc.h>
+#include <asm/cacheflush.h>
+
+/* called with the page_table_lock held */
+static inline void 
+remap_area_pte(pte_t * pte, unsigned long address, unsigned long size, 
+	       unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+	unsigned long pfn;
+
+	address &= ~PMD_MASK;
+	end = address + size;
+	if (end > PMD_SIZE)
+		end = PMD_SIZE;
+	if (address >= end)
+		BUG();
+	pfn = phys_addr >> PAGE_SHIFT;
+	do {
+		if (!pte_none(*pte)) {
+			printk("remap_area_pte: page already exists\n");
+			BUG();
+		}
+		set_pte(pte, pfn_pte(pfn, 
+				     __pgprot(_PAGE_VALID | _PAGE_ASM | 
+				              _PAGE_KRE | _PAGE_KWE | flags)));
+		address += PAGE_SIZE;
+		pfn++;
+		pte++;
+	} while (address && (address < end));
+}
+
+/* called with the page_table_lock held */
+static inline int 
+remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, 
+	       unsigned long phys_addr, unsigned long flags)
+{
+	unsigned long end;
+
+	address &= ~PGDIR_MASK;
+	end = address + size;
+	if (end > PGDIR_SIZE)
+		end = PGDIR_SIZE;
+	phys_addr -= address;
+	if (address >= end)
+		BUG();
+	do {
+		pte_t * pte = pte_alloc_kernel(&init_mm, pmd, address);
+		if (!pte)
+			return -ENOMEM;
+		remap_area_pte(pte, address, end - address, 
+				     address + phys_addr, flags);
+		address = (address + PMD_SIZE) & PMD_MASK;
+		pmd++;
+	} while (address && (address < end));
+	return 0;
+}
+
+int
+__alpha_remap_area_pages(unsigned long address, unsigned long phys_addr,
+			 unsigned long size, unsigned long flags)
+{
+	pgd_t * dir;
+	int error = 0;
+	unsigned long end = address + size;
+
+	phys_addr -= address;
+	dir = pgd_offset(&init_mm, address);
+	flush_cache_all();
+	if (address >= end)
+		BUG();
+	spin_lock(&init_mm.page_table_lock);
+	do {
+		pmd_t *pmd;
+		pmd = pmd_alloc(&init_mm, dir, address);
+		error = -ENOMEM;
+		if (!pmd)
+			break;
+		if (remap_area_pmd(pmd, address, end - address,
+			           phys_addr + address, flags))
+			break;
+		error = 0;
+		address = (address + PGDIR_SIZE) & PGDIR_MASK;
+		dir++;
+	} while (address && (address < end));
+	spin_unlock(&init_mm.page_table_lock);
+	return error;
+}
+