blob: 59348359f9ab3bd0bdf9b469ce850937e0052c36 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
Jeff Dikeda3e30e2007-07-23 18:43:47 -070017#include "kern_constants.h"
Jeff Dike75e55842005-09-03 15:57:45 -070018
Jeff Dike91acb212005-10-10 23:10:32 -040019struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080020 enum aio_type type;
21 int io_fd;
22 unsigned long long offset;
23 char *buf;
24 int len;
25 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040026};
27
Jeff Dike75e55842005-09-03 15:57:45 -070028#if defined(HAVE_AIO_ABI)
29#include <linux/aio_abi.h>
30
31/* If we have the headers, we are going to build with AIO enabled.
32 * If we don't have aio in libc, we define the necessary stubs here.
33 */
34
35#if !defined(HAVE_AIO_LIBC)
36
37static long io_setup(int n, aio_context_t *ctxp)
38{
Jeff Diked50084a2006-01-06 00:18:50 -080039 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070040}
41
42static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
43{
Jeff Diked50084a2006-01-06 00:18:50 -080044 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070045}
46
47static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080048 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070049{
Jeff Diked50084a2006-01-06 00:18:50 -080050 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070051}
52
53#endif
54
55/* The AIO_MMAP cases force the mmapped page into memory here
56 * rather than in whatever place first touches the data. I used
57 * to do this by touching the page, but that's delicate because
58 * gcc is prone to optimizing that away. So, what's done here
59 * is we read from the descriptor from which the page was
60 * mapped. The caller is required to pass an offset which is
61 * inside the page that was mapped. Thus, when the read
62 * returns, we know that the page is in the page cache, and
63 * that it now backs the mmapped area.
64 */
65
Jeff Dike91acb212005-10-10 23:10:32 -040066static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080067 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070068{
Jeff Dikeda3e30e2007-07-23 18:43:47 -070069 struct iocb *iocbp = & ((struct iocb) {
70 .aio_data = (unsigned long) aio,
71 .aio_fildes = fd,
72 .aio_buf = (unsigned long) buf,
73 .aio_nbytes = len,
74 .aio_offset = offset
75 });
Jeff Diked50084a2006-01-06 00:18:50 -080076 char c;
Jeff Dike75e55842005-09-03 15:57:45 -070077
Jeff Dikeda3e30e2007-07-23 18:43:47 -070078 switch (type) {
Jeff Diked50084a2006-01-06 00:18:50 -080079 case AIO_READ:
Jeff Dikeda3e30e2007-07-23 18:43:47 -070080 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
Jeff Diked50084a2006-01-06 00:18:50 -080081 break;
82 case AIO_WRITE:
Jeff Dikeda3e30e2007-07-23 18:43:47 -070083 iocbp->aio_lio_opcode = IOCB_CMD_PWRITE;
Jeff Diked50084a2006-01-06 00:18:50 -080084 break;
85 case AIO_MMAP:
Jeff Dikeda3e30e2007-07-23 18:43:47 -070086 iocbp->aio_lio_opcode = IOCB_CMD_PREAD;
87 iocbp->aio_buf = (unsigned long) &c;
88 iocbp->aio_nbytes = sizeof(c);
Jeff Diked50084a2006-01-06 00:18:50 -080089 break;
90 default:
Jeff Dikeda3e30e2007-07-23 18:43:47 -070091 printk(UM_KERN_ERR "Bogus op in do_aio - %d\n", type);
92 return -EINVAL;
Jeff Diked50084a2006-01-06 00:18:50 -080093 }
Jeff Dike09ace812005-09-03 15:57:46 -070094
Jeff Dikeda3e30e2007-07-23 18:43:47 -070095 return (io_submit(ctx, 1, &iocbp) > 0) ? 0 : -errno;
Jeff Dike75e55842005-09-03 15:57:45 -070096}
97
Jeff Dike9683da92007-02-10 01:44:27 -080098/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -070099static aio_context_t ctx = 0;
100
101static int aio_thread(void *arg)
102{
Jeff Diked50084a2006-01-06 00:18:50 -0800103 struct aio_thread_reply reply;
104 struct io_event event;
105 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700106
Jeff Diked50084a2006-01-06 00:18:50 -0800107 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700108
Jeff Diked50084a2006-01-06 00:18:50 -0800109 while(1){
110 n = io_getevents(ctx, 1, 1, &event, NULL);
111 if(n < 0){
112 if(errno == EINTR)
113 continue;
114 printk("aio_thread - io_getevents failed, "
115 "errno = %d\n", errno);
116 }
117 else {
118 reply = ((struct aio_thread_reply)
119 { .data = (void *) (long) event.data,
120 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400121 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
Jeff Dikea61f3342007-05-06 14:51:35 -0700122 err = write(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800123 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400124 printk("aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700125 "err = %d\n", reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800126 }
127 }
128 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700129}
130
131#endif
132
Jeff Dike91acb212005-10-10 23:10:32 -0400133static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700134{
Jeff Diked50084a2006-01-06 00:18:50 -0800135 char c;
Jeff Dikeef0470c2007-05-06 14:51:33 -0700136 unsigned long long actual;
Jeff Dikea61f3342007-05-06 14:51:35 -0700137 int n;
Jeff Dike75e55842005-09-03 15:57:45 -0700138
Jeff Dikeef0470c2007-05-06 14:51:33 -0700139 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
140 if(actual != req->offset)
141 return -errno;
142
Jeff Diked50084a2006-01-06 00:18:50 -0800143 switch(req->type){
144 case AIO_READ:
Jeff Dikea61f3342007-05-06 14:51:35 -0700145 n = read(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800146 break;
147 case AIO_WRITE:
Jeff Dikea61f3342007-05-06 14:51:35 -0700148 n = write(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800149 break;
150 case AIO_MMAP:
Jeff Dikea61f3342007-05-06 14:51:35 -0700151 n = read(req->io_fd, &c, sizeof(c));
Jeff Diked50084a2006-01-06 00:18:50 -0800152 break;
153 default:
154 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dikea61f3342007-05-06 14:51:35 -0700155 return -EINVAL;
Jeff Diked50084a2006-01-06 00:18:50 -0800156 }
Jeff Dike75e55842005-09-03 15:57:45 -0700157
Jeff Dikea61f3342007-05-06 14:51:35 -0700158 if(n < 0)
159 return -errno;
160 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700161}
162
Jeff Dike9683da92007-02-10 01:44:27 -0800163/* These are initialized in initcalls and not changed */
164static int aio_req_fd_r = -1;
165static int aio_req_fd_w = -1;
166static int aio_pid = -1;
Jeff Dikec4399012007-07-15 23:38:56 -0700167static unsigned long aio_stack;
Jeff Dike9683da92007-02-10 01:44:27 -0800168
Jeff Dike75e55842005-09-03 15:57:45 -0700169static int not_aio_thread(void *arg)
170{
Jeff Diked50084a2006-01-06 00:18:50 -0800171 struct aio_thread_req req;
172 struct aio_thread_reply reply;
173 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700174
Jeff Diked50084a2006-01-06 00:18:50 -0800175 signal(SIGWINCH, SIG_IGN);
176 while(1){
Jeff Dikea61f3342007-05-06 14:51:35 -0700177 err = read(aio_req_fd_r, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800178 if(err != sizeof(req)){
179 if(err < 0)
180 printk("not_aio_thread - read failed, "
181 "fd = %d, err = %d\n", aio_req_fd_r,
Jeff Dikea61f3342007-05-06 14:51:35 -0700182 errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800183 else {
184 printk("not_aio_thread - short read, fd = %d, "
185 "length = %d\n", aio_req_fd_r, err);
186 }
187 continue;
188 }
189 err = do_not_aio(&req);
190 reply = ((struct aio_thread_reply) { .data = req.aio,
Jeff Dikeef0470c2007-05-06 14:51:33 -0700191 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700192 err = write(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800193 if(err != sizeof(reply))
194 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700195 "err = %d\n", req.aio->reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800196 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800197
198 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700199}
200
Jeff Dike75e55842005-09-03 15:57:45 -0700201static int init_aio_24(void)
202{
Jeff Diked50084a2006-01-06 00:18:50 -0800203 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700204
Jeff Diked50084a2006-01-06 00:18:50 -0800205 err = os_pipe(fds, 1, 1);
206 if(err)
207 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700208
Jeff Diked50084a2006-01-06 00:18:50 -0800209 aio_req_fd_w = fds[0];
210 aio_req_fd_r = fds[1];
Jeff Dike8603ec82007-05-06 14:51:44 -0700211
212 err = os_set_fd_block(aio_req_fd_w, 0);
213 if(err)
214 goto out_close_pipe;
215
Jeff Diked50084a2006-01-06 00:18:50 -0800216 err = run_helper_thread(not_aio_thread, NULL,
Jeff Dikec4399012007-07-15 23:38:56 -0700217 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
Jeff Diked50084a2006-01-06 00:18:50 -0800218 if(err < 0)
219 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700220
Jeff Diked50084a2006-01-06 00:18:50 -0800221 aio_pid = err;
222 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700223
Jeff Diked50084a2006-01-06 00:18:50 -0800224out_close_pipe:
225 os_close_file(fds[0]);
226 os_close_file(fds[1]);
227 aio_req_fd_w = -1;
228 aio_req_fd_r = -1;
229out:
Jeff Dike75e55842005-09-03 15:57:45 -0700230#ifndef HAVE_AIO_ABI
231 printk("/usr/include/linux/aio_abi.h not present during build\n");
232#endif
233 printk("2.6 host AIO support not used - falling back to I/O "
234 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800235 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700236}
237
238#ifdef HAVE_AIO_ABI
239#define DEFAULT_24_AIO 0
240static int init_aio_26(void)
241{
Jeff Diked50084a2006-01-06 00:18:50 -0800242 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700243
Jeff Diked50084a2006-01-06 00:18:50 -0800244 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700245 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800246 printk("aio_thread failed to initialize context, err = %d\n",
247 errno);
248 return err;
249 }
Jeff Dike75e55842005-09-03 15:57:45 -0700250
Jeff Diked50084a2006-01-06 00:18:50 -0800251 err = run_helper_thread(aio_thread, NULL,
Jeff Dikec4399012007-07-15 23:38:56 -0700252 CLONE_FILES | CLONE_VM | SIGCHLD, &aio_stack);
Jeff Diked50084a2006-01-06 00:18:50 -0800253 if(err < 0)
254 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700255
Jeff Diked50084a2006-01-06 00:18:50 -0800256 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700257
258 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800259 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700260}
261
Jeff Dike91acb212005-10-10 23:10:32 -0400262static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
263 unsigned long long offset, struct aio_context *aio)
264{
Jeff Diked50084a2006-01-06 00:18:50 -0800265 struct aio_thread_reply reply;
266 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400267
Jeff Diked50084a2006-01-06 00:18:50 -0800268 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
269 if(err){
270 reply = ((struct aio_thread_reply) { .data = aio,
271 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700272 err = write(aio->reply_fd, &reply, sizeof(reply));
273 if(err != sizeof(reply)){
274 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800275 printk("submit_aio_26 - write failed, "
276 "fd = %d, err = %d\n", aio->reply_fd, -err);
Jeff Dikea61f3342007-05-06 14:51:35 -0700277 }
Jeff Diked50084a2006-01-06 00:18:50 -0800278 else err = 0;
279 }
Jeff Dike91acb212005-10-10 23:10:32 -0400280
Jeff Diked50084a2006-01-06 00:18:50 -0800281 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400282}
283
Jeff Dike75e55842005-09-03 15:57:45 -0700284#else
285#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400286static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700287{
Jeff Diked50084a2006-01-06 00:18:50 -0800288 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700289}
290
Jeff Dike91acb212005-10-10 23:10:32 -0400291static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
292 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700293{
Jeff Diked50084a2006-01-06 00:18:50 -0800294 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700295}
296#endif
297
Jeff Dike9683da92007-02-10 01:44:27 -0800298/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700299static int aio_24 = DEFAULT_24_AIO;
300
301static int __init set_aio_24(char *name, int *add)
302{
Jeff Diked50084a2006-01-06 00:18:50 -0800303 aio_24 = 1;
304 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700305}
306
307__uml_setup("aio=2.4", set_aio_24,
308"aio=2.4\n"
309" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
310" available. 2.4 AIO is a single thread that handles one request at a\n"
311" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
312" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
313" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
314" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
315" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
316" your /usr/include/linux in order to build an AIO-capable UML\n\n"
317);
318
319static int init_aio(void)
320{
Jeff Diked50084a2006-01-06 00:18:50 -0800321 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700322
Jeff Diked50084a2006-01-06 00:18:50 -0800323 CHOOSE_MODE(({ if(!aio_24){
324 printk("Disabling 2.6 AIO in tt mode\n");
325 aio_24 = 1;
326 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700327
Jeff Diked50084a2006-01-06 00:18:50 -0800328 if(!aio_24){
329 err = init_aio_26();
330 if(err && (errno == ENOSYS)){
331 printk("2.6 AIO not supported on the host - "
332 "reverting to 2.4 AIO\n");
333 aio_24 = 1;
334 }
335 else return err;
336 }
Jeff Dike75e55842005-09-03 15:57:45 -0700337
Jeff Diked50084a2006-01-06 00:18:50 -0800338 if(aio_24)
339 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700340
Jeff Diked50084a2006-01-06 00:18:50 -0800341 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700342}
343
344/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
345 * needs to be called when the kernel is running because it calls run_helper,
346 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
347 * kernel does not run __exitcalls on shutdown, and can't because many of them
348 * break when called outside of module unloading.
349 */
350__initcall(init_aio);
351
352static void exit_aio(void)
353{
Jeff Dikec4399012007-07-15 23:38:56 -0700354 if (aio_pid != -1) {
Jeff Diked50084a2006-01-06 00:18:50 -0800355 os_kill_process(aio_pid, 1);
Jeff Dikec4399012007-07-15 23:38:56 -0700356 free_stack(aio_stack, 0);
357 }
Jeff Dike75e55842005-09-03 15:57:45 -0700358}
359
360__uml_exitcall(exit_aio);
361
Jeff Dike91acb212005-10-10 23:10:32 -0400362static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
363 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700364{
Jeff Diked50084a2006-01-06 00:18:50 -0800365 struct aio_thread_req req = { .type = type,
366 .io_fd = io_fd,
367 .offset = offset,
368 .buf = buf,
369 .len = len,
370 .aio = aio,
371 };
372 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400373
Jeff Dikea61f3342007-05-06 14:51:35 -0700374 err = write(aio_req_fd_w, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800375 if(err == sizeof(req))
376 err = 0;
Jeff Dikea61f3342007-05-06 14:51:35 -0700377 else err = -errno;
Jeff Dike91acb212005-10-10 23:10:32 -0400378
Jeff Diked50084a2006-01-06 00:18:50 -0800379 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400380}
381
382int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800383 unsigned long long offset, int reply_fd,
384 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400385{
Jeff Diked50084a2006-01-06 00:18:50 -0800386 aio->reply_fd = reply_fd;
387 if(aio_24)
388 return submit_aio_24(type, io_fd, buf, len, offset, aio);
389 else {
390 return submit_aio_26(type, io_fd, buf, len, offset, aio);
391 }
Jeff Dike75e55842005-09-03 15:57:45 -0700392}