[PATCH] uml: TLB operation batching

This adds VM op batching to skas0.  Rather than having a context switch to and
from the userspace stub for each address space change, we write a number of
operations to the stub data page and invoke a different stub which loops over
them and executes them all in one go.

The operations are stored as [ system call number, arg1, arg2, ... ] tuples.

The set is terminated by a system call number of 0.  Single operations, i.e.
page faults, are handled in the old way, since that is slightly more
efficient.

For a kernel build, a minority (~1/4) of the operations are part of a set.
These sets averaged ~100 in length, so for this quarter, the context switching
overhead is greatly reduced.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
diff --git a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h
index d983ea8..e91064b 100644
--- a/arch/um/kernel/skas/include/skas.h
+++ b/arch/um/kernel/skas/include/skas.h
@@ -24,11 +24,14 @@
 extern void remove_sigstack(void);
 extern void new_thread_handler(int sig);
 extern void handle_syscall(union uml_pt_regs *regs);
-extern int map(struct mm_id * mm_idp, unsigned long virt, unsigned long len,
-               int r, int w, int x, int phys_fd, unsigned long long offset);
-extern int unmap(struct mm_id * mm_idp, void *addr, unsigned long len);
-extern int protect(struct mm_id * mm_idp, unsigned long addr,
-		   unsigned long len, int r, int w, int x);
+extern void *map(struct mm_id * mm_idp, unsigned long virt,
+		 unsigned long len, int r, int w, int x, int phys_fd,
+		 unsigned long long offset, int done, void *data);
+extern void *unmap(struct mm_id * mm_idp, void *addr,
+		   unsigned long len, int done, void *data);
+extern void *protect(struct mm_id * mm_idp, unsigned long addr,
+		     unsigned long len, int r, int w, int x, int done,
+		     void *data);
 extern void user_signal(int sig, union uml_pt_regs *regs, int pid);
 extern int new_mm(int from);
 extern int start_userspace(unsigned long stub_stack);
diff --git a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c
index b0980ff..c976320 100644
--- a/arch/um/kernel/skas/mem_user.c
+++ b/arch/um/kernel/skas/mem_user.c
@@ -25,12 +25,14 @@
 #include "sysdep/stub.h"
 #include "skas.h"
 
-extern unsigned long syscall_stub, __syscall_stub_start;
+extern unsigned long syscall_stub, batch_syscall_stub, __syscall_stub_start;
 
 extern void wait_stub_done(int pid, int sig, char * fname);
 
-static long run_syscall_stub(struct mm_id * mm_idp, int syscall,
-                             unsigned long *args)
+int single_count = 0;
+
+static long one_syscall_stub(struct mm_id * mm_idp, int syscall,
+			     unsigned long *args)
 {
         int n, pid = mm_idp->u.pid;
         unsigned long regs[MAX_REG_NR];
@@ -49,18 +51,80 @@
         regs[REGS_SYSCALL_ARG6] = args[5];
         n = ptrace_setregs(pid, regs);
         if(n < 0){
-                printk("run_syscall_stub : PTRACE_SETREGS failed, "
+		printk("one_syscall_stub : PTRACE_SETREGS failed, "
+		       "errno = %d\n", n);
+		return(n);
+	}
+
+	wait_stub_done(pid, 0, "one_syscall_stub");
+
+	return(*((unsigned long *) mm_idp->stack));
+}
+
+int multi_count = 0;
+int multi_op_count = 0;
+
+static long many_syscall_stub(struct mm_id * mm_idp, int syscall,
+			      unsigned long *args, int done, void **addr_out)
+{
+        unsigned long regs[MAX_REG_NR], *stack;
+        int n, pid = mm_idp->u.pid;
+
+        stack = *addr_out;
+        if(stack == NULL)
+                stack = (unsigned long *) current_stub_stack();
+        *stack++ = syscall;
+        *stack++ = args[0];
+        *stack++ = args[1];
+        *stack++ = args[2];
+        *stack++ = args[3];
+        *stack++ = args[4];
+        *stack++ = args[5];
+        *stack = 0;
+        multi_op_count++;
+
+        if(!done && ((((unsigned long) stack) & ~PAGE_MASK) <
+                     PAGE_SIZE - 8 * sizeof(long))){
+                *addr_out = stack;
+                return 0;
+        }
+
+        multi_count++;
+        get_safe_registers(regs);
+        regs[REGS_IP_INDEX] = UML_CONFIG_STUB_CODE +
+                ((unsigned long) &batch_syscall_stub -
+                 (unsigned long) &__syscall_stub_start);
+        regs[REGS_SP_INDEX] = UML_CONFIG_STUB_DATA;
+
+        n = ptrace_setregs(pid, regs);
+        if(n < 0){
+                printk("many_syscall_stub : PTRACE_SETREGS failed, "
                        "errno = %d\n", n);
                 return(n);
         }
 
-        wait_stub_done(pid, 0, "run_syscall_stub");
+        wait_stub_done(pid, 0, "many_syscall_stub");
+        stack = (unsigned long *) mm_idp->stack;
 
-        return(*((unsigned long *) mm_idp->stack));
+        *addr_out = stack;
+        return(*stack);
 }
 
-int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len,
-        int r, int w, int x, int phys_fd, unsigned long long offset)
+static long run_syscall_stub(struct mm_id * mm_idp, int syscall,
+                             unsigned long *args, void **addr, int done)
+{
+        long res;
+
+        if((*addr == NULL) && done)
+                res = one_syscall_stub(mm_idp, syscall, args);
+        else res = many_syscall_stub(mm_idp, syscall, args, done, addr);
+
+        return res;
+}
+
+void *map(struct mm_id * mm_idp, unsigned long virt, unsigned long len,
+          int r, int w, int x, int phys_fd, unsigned long long offset,
+          int done, void *data)
 {
         int prot, n;
 
@@ -70,6 +134,7 @@
         if(proc_mm){
                 struct proc_mm_op map;
                 int fd = mm_idp->u.mm_fd;
+
                 map = ((struct proc_mm_op) { .op	= MM_MMAP,
                                              .u		=
                                              { .mmap	=
@@ -91,21 +156,24 @@
                                          MAP_SHARED | MAP_FIXED, phys_fd,
                                          MMAP_OFFSET(offset) };
 
-                res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args);
+		res = run_syscall_stub(mm_idp, STUB_MMAP_NR, args,
+				       &data, done);
                 if((void *) res == MAP_FAILED)
                         printk("mmap stub failed, errno = %d\n", res);
         }
 
-        return 0;
+	return data;
 }
 
-int unmap(struct mm_id *mm_idp, void *addr, unsigned long len)
+void *unmap(struct mm_id * mm_idp, void *addr, unsigned long len, int done,
+            void *data)
 {
         int n;
 
         if(proc_mm){
                 struct proc_mm_op unmap;
                 int fd = mm_idp->u.mm_fd;
+
                 unmap = ((struct proc_mm_op) { .op	= MM_MUNMAP,
                                                .u	=
                                                { .munmap	=
@@ -113,28 +181,25 @@
                                                    (unsigned long) addr,
                                                    .len		= len } } } );
                 n = os_write_file(fd, &unmap, sizeof(unmap));
-                if(n != sizeof(unmap)) {
-                        if(n < 0)
-                                return(n);
-                        else if(n > 0)
-                                return(-EIO);
-                }
+		if(n != sizeof(unmap))
+		  printk("unmap - proc_mm write returned %d\n", n);
         }
         else {
                 int res;
                 unsigned long args[] = { (unsigned long) addr, len, 0, 0, 0,
                                          0 };
 
-                res = run_syscall_stub(mm_idp, __NR_munmap, args);
+		res = run_syscall_stub(mm_idp, __NR_munmap, args,
+				       &data, done);
                 if(res < 0)
                         printk("munmap stub failed, errno = %d\n", res);
         }
 
-        return(0);
+        return data;
 }
 
-int protect(struct mm_id *mm_idp, unsigned long addr, unsigned long len,
-	    int r, int w, int x)
+void *protect(struct mm_id * mm_idp, unsigned long addr, unsigned long len,
+              int r, int w, int x, int done, void *data)
 {
         struct proc_mm_op protect;
         int prot, n;
@@ -160,12 +225,13 @@
                 int res;
                 unsigned long args[] = { addr, len, prot, 0, 0, 0 };
 
-                res = run_syscall_stub(mm_idp, __NR_mprotect, args);
+                res = run_syscall_stub(mm_idp, __NR_mprotect, args,
+                                       &data, done);
                 if(res < 0)
                         panic("mprotect stub failed, errno = %d\n", res);
         }
 
-        return(0);
+        return data;
 }
 
 void before_mem_skas(unsigned long unused)
diff --git a/arch/um/kernel/skas/tlb.c b/arch/um/kernel/skas/tlb.c
index 6230999..4b5fd20 100644
--- a/arch/um/kernel/skas/tlb.c
+++ b/arch/um/kernel/skas/tlb.c
@@ -18,7 +18,8 @@
 #include "os.h"
 #include "tlb.h"
 
-static void do_ops(union mm_context *mmu, struct host_vm_op *ops, int last)
+static void *do_ops(union mm_context *mmu, struct host_vm_op *ops, int last,
+		    int finished, void *flush)
 {
 	struct host_vm_op *op;
 	int i;
@@ -27,24 +28,28 @@
 		op = &ops[i];
 		switch(op->type){
 		case MMAP:
-                        map(&mmu->skas.id, op->u.mmap.addr, op->u.mmap.len,
-			    op->u.mmap.r, op->u.mmap.w, op->u.mmap.x,
-			    op->u.mmap.fd, op->u.mmap.offset);
+			flush = map(&mmu->skas.id, op->u.mmap.addr,
+				    op->u.mmap.len, op->u.mmap.r, op->u.mmap.w,
+				    op->u.mmap.x, op->u.mmap.fd,
+				    op->u.mmap.offset, finished, flush);
 			break;
 		case MUNMAP:
-                        unmap(&mmu->skas.id, (void *) op->u.munmap.addr,
-			      op->u.munmap.len);
+			flush = unmap(&mmu->skas.id, (void *) op->u.munmap.addr,
+				      op->u.munmap.len, finished, flush);
 			break;
 		case MPROTECT:
-                        protect(&mmu->skas.id, op->u.mprotect.addr,
-                                op->u.mprotect.len, op->u.mprotect.r,
-                                op->u.mprotect.w, op->u.mprotect.x);
+			flush = protect(&mmu->skas.id, op->u.mprotect.addr,
+					op->u.mprotect.len, op->u.mprotect.r,
+					op->u.mprotect.w, op->u.mprotect.x,
+					finished, flush);
 			break;
 		default:
 			printk("Unknown op type %d in do_ops\n", op->type);
 			break;
 		}
 	}
+
+	return flush;
 }
 
 extern int proc_mm;