uml: runtime host VMSPLIT detection

Calculate TASK_SIZE at run-time by figuring out the host's VMSPLIT - this is
needed on i386 if UML is to run on hosts with varying VMSPLITs without
recompilation.

TASK_SIZE is now defined in terms of a variable, task_size.  This gets rid of
an include of pgtable.h from processor.h, which can cause include loops.

On i386, task_size is calculated early in boot by probing the address space in
a binary search to figure out where the boundary between usable and non-usable
memory is.  This tries to make sure that a page that is considered to be in
userspace is, or can be made, read-write.  I'm concerned about a system-global
VDSO page in kernel memory being hit and considered to be a userspace page.

On x86_64, task_size is just the old value of CONFIG_TOP_ADDR.

A bunch of config variable are gone now.  CONFIG_TOP_ADDR is directly replaced
by TASK_SIZE.  NEST_LEVEL is gone since the relocation of the stubs makes it
irrelevant.  All the HOST_VMSPLIT stuff is gone.  All references to these in
arch/um/Makefile are also gone.

I noticed and fixed a missing extern in os.h when adding os_get_task_size.

Note: This has been revised to fix the 32-bit UML on 64-bit host bug that
Miklos ran into.

Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/um/os-Linux/sys-i386/Makefile b/arch/um/os-Linux/sys-i386/Makefile
index a841262..b4bc6ac 100644
--- a/arch/um/os-Linux/sys-i386/Makefile
+++ b/arch/um/os-Linux/sys-i386/Makefile
@@ -3,7 +3,7 @@
 # Licensed under the GPL
 #
 
-obj-y = registers.o signal.o tls.o
+obj-y = registers.o signal.o task_size.o tls.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-i386/task_size.c b/arch/um/os-Linux/sys-i386/task_size.c
new file mode 100644
index 0000000..48d211b
--- /dev/null
+++ b/arch/um/os-Linux/sys-i386/task_size.c
@@ -0,0 +1,120 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include "longjmp.h"
+#include "kern_constants.h"
+
+static jmp_buf buf;
+
+static void segfault(int sig)
+{
+	longjmp(buf, 1);
+}
+
+static int page_ok(unsigned long page)
+{
+	unsigned long *address = (unsigned long *) (page << UM_KERN_PAGE_SHIFT);
+	unsigned long n = ~0UL;
+	void *mapped = NULL;
+	int ok = 0;
+
+	/*
+	 * First see if the page is readable.  If it is, it may still
+	 * be a VDSO, so we go on to see if it's writable.  If not
+	 * then try mapping memory there.  If that fails, then we're
+	 * still in the kernel area.  As a sanity check, we'll fail if
+	 * the mmap succeeds, but gives us an address different from
+	 * what we wanted.
+	 */
+	if (setjmp(buf) == 0)
+		n = *address;
+	else {
+		mapped = mmap(address, UM_KERN_PAGE_SIZE,
+			      PROT_READ | PROT_WRITE,
+			      MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (mapped == MAP_FAILED)
+			return 0;
+		if (mapped != address)
+			goto out;
+	}
+
+	/*
+	 * Now, is it writeable?  If so, then we're in user address
+	 * space.  If not, then try mprotecting it and try the write
+	 * again.
+	 */
+	if (setjmp(buf) == 0) {
+		*address = n;
+		ok = 1;
+		goto out;
+	} else if (mprotect(address, UM_KERN_PAGE_SIZE,
+			    PROT_READ | PROT_WRITE) != 0)
+		goto out;
+
+	if (setjmp(buf) == 0) {
+		*address = n;
+		ok = 1;
+	}
+
+ out:
+	if (mapped != NULL)
+		munmap(mapped, UM_KERN_PAGE_SIZE);
+	return ok;
+}
+
+unsigned long os_get_task_size(void)
+{
+	struct sigaction sa, old;
+	unsigned long bottom = 0;
+	/*
+	 * A 32-bit UML on a 64-bit host gets confused about the VDSO at
+	 * 0xffffe000.  It is mapped, is readable, can be reprotected writeable
+	 * and written.  However, exec discovers later that it can't be
+	 * unmapped.  So, just set the highest address to be checked to just
+	 * below it.  This might waste some address space on 4G/4G 32-bit
+	 * hosts, but shouldn't hurt otherwise.
+	 */
+	unsigned long top = 0xffffd000 >> UM_KERN_PAGE_SHIFT;
+	unsigned long test;
+
+	printf("Locating the top of the address space ... ");
+	fflush(stdout);
+
+	/*
+	 * We're going to be longjmping out of the signal handler, so
+	 * SA_DEFER needs to be set.
+	 */
+	sa.sa_handler = segfault;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = SA_NODEFER;
+	sigaction(SIGSEGV, &sa, &old);
+
+	if (!page_ok(bottom)) {
+		fprintf(stderr, "Address 0x%x no good?\n",
+			bottom << UM_KERN_PAGE_SHIFT);
+		exit(1);
+	}
+
+	/* This could happen with a 4G/4G split */
+	if (page_ok(top))
+		goto out;
+
+	do {
+		test = bottom + (top - bottom) / 2;
+		if (page_ok(test))
+			bottom = test;
+		else
+			top = test;
+	} while (top - bottom > 1);
+
+out:
+	/* Restore the old SIGSEGV handling */
+	sigaction(SIGSEGV, &old, NULL);
+
+	top <<= UM_KERN_PAGE_SHIFT;
+	printf("0x%x\n", top);
+	fflush(stdout);
+
+	return top;
+}
diff --git a/arch/um/os-Linux/sys-x86_64/Makefile b/arch/um/os-Linux/sys-x86_64/Makefile
index a42a4ef..a44a47f 100644
--- a/arch/um/os-Linux/sys-x86_64/Makefile
+++ b/arch/um/os-Linux/sys-x86_64/Makefile
@@ -3,7 +3,7 @@
 # Licensed under the GPL
 #
 
-obj-y = registers.o prctl.o signal.o
+obj-y = registers.o prctl.o signal.o task_size.o
 
 USER_OBJS := $(obj-y)
 
diff --git a/arch/um/os-Linux/sys-x86_64/task_size.c b/arch/um/os-Linux/sys-x86_64/task_size.c
new file mode 100644
index 0000000..fad6f57
--- /dev/null
+++ b/arch/um/os-Linux/sys-x86_64/task_size.c
@@ -0,0 +1,5 @@
+unsigned long os_get_task_size(unsigned long shift)
+{
+	/* The old value of CONFIG_TOP_ADDR */
+	return 0x7fc0000000;
+}