uml: start fixing os_read_file and os_write_file
This patch starts the removal of a very old, very broken piece of code. This
stems from the problem of passing a userspace buffer into read() or write() on
the host. If that buffer had not yet been faulted in, read and write will
return -EFAULT.
To avoid this problem, the solution was to fault the buffer in before the
system call by touching the pages that hold the buffer by doing a copy-user of
a byte to each page. This is obviously bogus, but it does usually work, in tt
mode, since the kernel and process are in the same address space and userspace
addresses can be accessed directly in the kernel.
In skas mode, where the kernel and process are in separate address spaces, it
is completely bogus because the userspace address, which is invalid in the
kernel, is passed into the system call instead of the corresponding physical
address, which would be valid. Here, it appears that this code, on every host
read() or write(), tries to fault in a random process page. This doesn't seem
to cause any correctness problems, but there is a performance impact. This
patch, and the ones following, result in a 10-15% performance gain on a kernel
build.
This code can't be immediately tossed out because when it is, you can't log
in. Apparently, there is some code in the console driver which depends on
this somehow.
However, we can start removing it by switching the code which does I/O using
kernel addresses to using plain read() and write(). This patch introduces
os_read_file_k and os_write_file_k for use with kernel buffers and converts
all call locations which use obvious kernel buffers to use them. These
include I/O using buffers which are local variables which are on the stack or
kmalloc-ed. Later patches will handle the less obvious cases, followed by a
mass conversion back to the original interface.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
diff --git a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
index ee53cf8..d226f10 100644
--- a/arch/um/drivers/chan_user.c
+++ b/arch/um/drivers/chan_user.c
@@ -85,7 +85,7 @@
pty_fd = data->pty_fd;
pipe_fd = data->pipe_fd;
- count = os_write_file(pipe_fd, &c, sizeof(c));
+ count = os_write_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : failed to write synchronization "
"byte, err = %d\n", -count);
@@ -120,7 +120,7 @@
* host - since they are not different kernel threads, we cannot use
* kernel semaphores. We don't use SysV semaphores because they are
* persistent. */
- count = os_read_file(pipe_fd, &c, sizeof(c));
+ count = os_read_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : failed to read synchronization byte, "
"err = %d\n", -count);
@@ -130,7 +130,7 @@
* are blocked.*/
sigsuspend(&sigs);
- count = os_write_file(pipe_fd, &c, sizeof(c));
+ count = os_write_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : write failed, err = %d\n",
-count);
@@ -162,7 +162,7 @@
}
*fd_out = fds[0];
- n = os_read_file(fds[0], &c, sizeof(c));
+ n = os_read_file_k(fds[0], &c, sizeof(c));
if(n != sizeof(c)){
printk("winch_tramp : failed to read synchronization byte\n");
printk("read failed, err = %d\n", -n);
@@ -195,7 +195,7 @@
if(thread > 0){
register_winch_irq(thread_fd, fd, thread, tty);
- count = os_write_file(thread_fd, &c, sizeof(c));
+ count = os_write_file_k(thread_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("register_winch : failed to write "
"synchronization byte, err = %d\n",
diff --git a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
index b869e38..e1fd26c 100644
--- a/arch/um/drivers/daemon_user.c
+++ b/arch/um/drivers/daemon_user.c
@@ -94,7 +94,7 @@
req.version = SWITCH_VERSION;
req.type = REQ_NEW_CONTROL;
req.sock = *local_addr;
- n = os_write_file(pri->control, &req, sizeof(req));
+ n = os_write_file_k(pri->control, &req, sizeof(req));
if(n != sizeof(req)){
printk("daemon_open : control setup request failed, err = %d\n",
-n);
@@ -102,7 +102,7 @@
goto out_free;
}
- n = os_read_file(pri->control, sun, sizeof(*sun));
+ n = os_read_file_k(pri->control, sun, sizeof(*sun));
if(n != sizeof(*sun)){
printk("daemon_open : read of data socket failed, err = %d\n",
-n);
diff --git a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
index 5eeecf89..0fbb161 100644
--- a/arch/um/drivers/harddog_user.c
+++ b/arch/um/drivers/harddog_user.c
@@ -79,7 +79,7 @@
goto out_close_out;
}
- n = os_read_file(in_fds[0], &c, sizeof(c));
+ n = os_read_file_k(in_fds[0], &c, sizeof(c));
if(n == 0){
printk("harddog_open - EOF on watchdog pipe\n");
helper_wait(pid);
@@ -118,7 +118,7 @@
int n;
char c = '\n';
- n = os_write_file(fd, &c, sizeof(c));
+ n = os_write_file_k(fd, &c, sizeof(c));
if(n != sizeof(c)){
printk("ping_watchdog - write failed, err = %d\n", -n);
if(n < 0)
diff --git a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
index 10e08a8..bd6688e 100644
--- a/arch/um/drivers/hostaudio_kern.c
+++ b/arch/um/drivers/hostaudio_kern.c
@@ -84,7 +84,7 @@
if(kbuf == NULL)
return(-ENOMEM);
- err = os_read_file(state->fd, kbuf, count);
+ err = os_read_file_k(state->fd, kbuf, count);
if(err < 0)
goto out;
@@ -115,7 +115,7 @@
if(copy_from_user(kbuf, buffer, count))
goto out;
- err = os_write_file(state->fd, kbuf, count);
+ err = os_write_file_k(state->fd, kbuf, count);
if(err < 0)
goto out;
*ppos += err;
diff --git a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
index 3503cff..2dc57a3 100644
--- a/arch/um/drivers/net_user.c
+++ b/arch/um/drivers/net_user.c
@@ -63,7 +63,7 @@
}
*output = '\0';
- ret = os_read_file(fd, &remain, sizeof(remain));
+ ret = os_read_file_k(fd, &remain, sizeof(remain));
if (ret != sizeof(remain)) {
expected = sizeof(remain);
diff --git a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
index 1c8efd9..75bb401 100644
--- a/arch/um/drivers/port_kern.c
+++ b/arch/um/drivers/port_kern.c
@@ -113,7 +113,7 @@
}
if(atomic_read(&port->wait_count) == 0){
- os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
+ os_write_file_k(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
printk("No one waiting for port\n");
}
list_add(&conn->list, &port->pending);
diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c
index e942e83..94838f4 100644
--- a/arch/um/drivers/random.c
+++ b/arch/um/drivers/random.c
@@ -44,7 +44,7 @@
int n, ret = 0, have_data;
while(size){
- n = os_read_file(random_fd, &data, sizeof(data));
+ n = os_read_file_k(random_fd, &data, sizeof(data));
if(n > 0){
have_data = n;
while (have_data && size) {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 83189e1..6d163c9 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -504,7 +504,7 @@
struct ubd *dev;
int n;
- n = os_read_file(thread_fd, &req, sizeof(req));
+ n = os_read_file_k(thread_fd, &req, sizeof(req));
if(n != sizeof(req)){
printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
"err = %d\n", os_getpid(), -n);
@@ -1092,8 +1092,7 @@
err = prepare_request(req, &io_req);
if(!err){
dev->active = 1;
- n = os_write_file(thread_fd, (char *) &io_req,
- sizeof(io_req));
+ n = os_write_file_k(thread_fd, &io_req, sizeof(io_req));
if(n != sizeof(io_req))
printk("write to io thread failed, "
"errno = %d\n", -n);
@@ -1336,8 +1335,8 @@
return(1);
}
- n = os_write_file(req->fds[1], &req->bitmap_words,
- sizeof(req->bitmap_words));
+ n = os_write_file_k(req->fds[1], &req->bitmap_words,
+ sizeof(req->bitmap_words));
if(n != sizeof(req->bitmap_words)){
printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
req->fds[1]);
@@ -1381,7 +1380,7 @@
do {
buf = &buf[n];
len -= n;
- n = os_read_file(req->fds[bit], buf, len);
+ n = os_read_file_k(req->fds[bit], buf, len);
if (n < 0) {
printk("do_io - read failed, err = %d "
"fd = %d\n", -n, req->fds[bit]);
@@ -1391,7 +1390,7 @@
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
} else {
- n = os_write_file(req->fds[bit], buf, len);
+ n = os_write_file_k(req->fds[bit], buf, len);
if(n != len){
printk("do_io - write failed err = %d "
"fd = %d\n", -n, req->fds[bit]);
@@ -1421,7 +1420,7 @@
ignore_sigwinch_sig();
while(1){
- n = os_read_file(kernel_fd, &req, sizeof(req));
+ n = os_read_file_k(kernel_fd, &req, sizeof(req));
if(n != sizeof(req)){
if(n < 0)
printk("io_thread - read failed, fd = %d, "
@@ -1434,7 +1433,7 @@
}
io_count++;
do_io(&req);
- n = os_write_file(kernel_fd, &req, sizeof(req));
+ n = os_write_file_k(kernel_fd, &req, sizeof(req));
if(n != sizeof(req))
printk("io_thread - write failed, fd = %d, err = %d\n",
kernel_fd, -n);