blob: 5d258eb4f50645f237b4dd1e0ec880bd1145e26f [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027#if defined(HAVE_AIO_ABI)
28#include <linux/aio_abi.h>
29
30/* If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
Jeff Diked50084a2006-01-06 00:18:50 -080038 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070039}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
Jeff Diked50084a2006-01-06 00:18:50 -080043 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070044}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080047 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070048{
Jeff Diked50084a2006-01-06 00:18:50 -080049 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070050}
51
52#endif
53
54/* The AIO_MMAP cases force the mmapped page into memory here
55 * rather than in whatever place first touches the data. I used
56 * to do this by touching the page, but that's delicate because
57 * gcc is prone to optimizing that away. So, what's done here
58 * is we read from the descriptor from which the page was
59 * mapped. The caller is required to pass an offset which is
60 * inside the page that was mapped. Thus, when the read
61 * returns, we know that the page is in the page cache, and
62 * that it now backs the mmapped area.
63 */
64
Jeff Dike91acb212005-10-10 23:10:32 -040065static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080066 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070067{
Jeff Diked50084a2006-01-06 00:18:50 -080068 struct iocb iocb, *iocbp = &iocb;
69 char c;
70 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070071
Jeff Diked50084a2006-01-06 00:18:50 -080072 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
73 .aio_reqprio = 0,
74 .aio_fildes = fd,
75 .aio_buf = (unsigned long) buf,
76 .aio_nbytes = len,
77 .aio_offset = offset,
78 .aio_reserved1 = 0,
79 .aio_reserved2 = 0,
80 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070081
Jeff Diked50084a2006-01-06 00:18:50 -080082 switch(type){
83 case AIO_READ:
84 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
85 err = io_submit(ctx, 1, &iocbp);
86 break;
87 case AIO_WRITE:
88 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_MMAP:
92 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
93 iocb.aio_buf = (unsigned long) &c;
94 iocb.aio_nbytes = sizeof(c);
95 err = io_submit(ctx, 1, &iocbp);
96 break;
97 default:
98 printk("Bogus op in do_aio - %d\n", type);
99 err = -EINVAL;
100 break;
101 }
Jeff Dike09ace812005-09-03 15:57:46 -0700102
Jeff Diked50084a2006-01-06 00:18:50 -0800103 if(err > 0)
104 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700105 else
106 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700107
Jeff Diked50084a2006-01-06 00:18:50 -0800108 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700109}
110
Jeff Dike9683da92007-02-10 01:44:27 -0800111/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
Jeff Diked50084a2006-01-06 00:18:50 -0800116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700119
Jeff Diked50084a2006-01-06 00:18:50 -0800120 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
Jeff Dikea61f3342007-05-06 14:51:35 -0700135 err = write(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800136 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400137 printk("aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700138 "err = %d\n", reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800139 }
140 }
141 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700142}
143
144#endif
145
Jeff Dike91acb212005-10-10 23:10:32 -0400146static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700147{
Jeff Diked50084a2006-01-06 00:18:50 -0800148 char c;
Jeff Dikeef0470c2007-05-06 14:51:33 -0700149 unsigned long long actual;
Jeff Dikea61f3342007-05-06 14:51:35 -0700150 int n;
Jeff Dike75e55842005-09-03 15:57:45 -0700151
Jeff Dikeef0470c2007-05-06 14:51:33 -0700152 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
153 if(actual != req->offset)
154 return -errno;
155
Jeff Diked50084a2006-01-06 00:18:50 -0800156 switch(req->type){
157 case AIO_READ:
Jeff Dikea61f3342007-05-06 14:51:35 -0700158 n = read(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800159 break;
160 case AIO_WRITE:
Jeff Dikea61f3342007-05-06 14:51:35 -0700161 n = write(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800162 break;
163 case AIO_MMAP:
Jeff Dikea61f3342007-05-06 14:51:35 -0700164 n = read(req->io_fd, &c, sizeof(c));
Jeff Diked50084a2006-01-06 00:18:50 -0800165 break;
166 default:
167 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dikea61f3342007-05-06 14:51:35 -0700168 return -EINVAL;
Jeff Diked50084a2006-01-06 00:18:50 -0800169 }
Jeff Dike75e55842005-09-03 15:57:45 -0700170
Jeff Dikea61f3342007-05-06 14:51:35 -0700171 if(n < 0)
172 return -errno;
173 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700174}
175
Jeff Dike9683da92007-02-10 01:44:27 -0800176/* These are initialized in initcalls and not changed */
177static int aio_req_fd_r = -1;
178static int aio_req_fd_w = -1;
179static int aio_pid = -1;
180
Jeff Dike75e55842005-09-03 15:57:45 -0700181static int not_aio_thread(void *arg)
182{
Jeff Diked50084a2006-01-06 00:18:50 -0800183 struct aio_thread_req req;
184 struct aio_thread_reply reply;
185 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700186
Jeff Diked50084a2006-01-06 00:18:50 -0800187 signal(SIGWINCH, SIG_IGN);
188 while(1){
Jeff Dikea61f3342007-05-06 14:51:35 -0700189 err = read(aio_req_fd_r, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800190 if(err != sizeof(req)){
191 if(err < 0)
192 printk("not_aio_thread - read failed, "
193 "fd = %d, err = %d\n", aio_req_fd_r,
Jeff Dikea61f3342007-05-06 14:51:35 -0700194 errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800195 else {
196 printk("not_aio_thread - short read, fd = %d, "
197 "length = %d\n", aio_req_fd_r, err);
198 }
199 continue;
200 }
201 err = do_not_aio(&req);
202 reply = ((struct aio_thread_reply) { .data = req.aio,
Jeff Dikeef0470c2007-05-06 14:51:33 -0700203 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700204 err = write(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800205 if(err != sizeof(reply))
206 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700207 "err = %d\n", req.aio->reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800208 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800209
210 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700211}
212
Jeff Dike75e55842005-09-03 15:57:45 -0700213static int init_aio_24(void)
214{
Jeff Diked50084a2006-01-06 00:18:50 -0800215 unsigned long stack;
216 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700217
Jeff Diked50084a2006-01-06 00:18:50 -0800218 err = os_pipe(fds, 1, 1);
219 if(err)
220 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700221
Jeff Diked50084a2006-01-06 00:18:50 -0800222 aio_req_fd_w = fds[0];
223 aio_req_fd_r = fds[1];
224 err = run_helper_thread(not_aio_thread, NULL,
225 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
226 if(err < 0)
227 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700228
Jeff Diked50084a2006-01-06 00:18:50 -0800229 aio_pid = err;
230 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700231
Jeff Diked50084a2006-01-06 00:18:50 -0800232out_close_pipe:
233 os_close_file(fds[0]);
234 os_close_file(fds[1]);
235 aio_req_fd_w = -1;
236 aio_req_fd_r = -1;
237out:
Jeff Dike75e55842005-09-03 15:57:45 -0700238#ifndef HAVE_AIO_ABI
239 printk("/usr/include/linux/aio_abi.h not present during build\n");
240#endif
241 printk("2.6 host AIO support not used - falling back to I/O "
242 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800243 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700244}
245
246#ifdef HAVE_AIO_ABI
247#define DEFAULT_24_AIO 0
248static int init_aio_26(void)
249{
Jeff Diked50084a2006-01-06 00:18:50 -0800250 unsigned long stack;
251 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700252
Jeff Diked50084a2006-01-06 00:18:50 -0800253 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700254 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800255 printk("aio_thread failed to initialize context, err = %d\n",
256 errno);
257 return err;
258 }
Jeff Dike75e55842005-09-03 15:57:45 -0700259
Jeff Diked50084a2006-01-06 00:18:50 -0800260 err = run_helper_thread(aio_thread, NULL,
261 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
262 if(err < 0)
263 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700264
Jeff Diked50084a2006-01-06 00:18:50 -0800265 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700266
267 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800268 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700269}
270
Jeff Dike91acb212005-10-10 23:10:32 -0400271static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
272 unsigned long long offset, struct aio_context *aio)
273{
Jeff Diked50084a2006-01-06 00:18:50 -0800274 struct aio_thread_reply reply;
275 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400276
Jeff Diked50084a2006-01-06 00:18:50 -0800277 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
278 if(err){
279 reply = ((struct aio_thread_reply) { .data = aio,
280 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700281 err = write(aio->reply_fd, &reply, sizeof(reply));
282 if(err != sizeof(reply)){
283 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800284 printk("submit_aio_26 - write failed, "
285 "fd = %d, err = %d\n", aio->reply_fd, -err);
Jeff Dikea61f3342007-05-06 14:51:35 -0700286 }
Jeff Diked50084a2006-01-06 00:18:50 -0800287 else err = 0;
288 }
Jeff Dike91acb212005-10-10 23:10:32 -0400289
Jeff Diked50084a2006-01-06 00:18:50 -0800290 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400291}
292
Jeff Dike75e55842005-09-03 15:57:45 -0700293#else
294#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400295static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700296{
Jeff Diked50084a2006-01-06 00:18:50 -0800297 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700298}
299
Jeff Dike91acb212005-10-10 23:10:32 -0400300static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
301 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700302{
Jeff Diked50084a2006-01-06 00:18:50 -0800303 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700304}
305#endif
306
Jeff Dike9683da92007-02-10 01:44:27 -0800307/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700308static int aio_24 = DEFAULT_24_AIO;
309
310static int __init set_aio_24(char *name, int *add)
311{
Jeff Diked50084a2006-01-06 00:18:50 -0800312 aio_24 = 1;
313 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700314}
315
316__uml_setup("aio=2.4", set_aio_24,
317"aio=2.4\n"
318" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
319" available. 2.4 AIO is a single thread that handles one request at a\n"
320" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
321" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
322" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
323" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
324" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
325" your /usr/include/linux in order to build an AIO-capable UML\n\n"
326);
327
328static int init_aio(void)
329{
Jeff Diked50084a2006-01-06 00:18:50 -0800330 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700331
Jeff Diked50084a2006-01-06 00:18:50 -0800332 CHOOSE_MODE(({ if(!aio_24){
333 printk("Disabling 2.6 AIO in tt mode\n");
334 aio_24 = 1;
335 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700336
Jeff Diked50084a2006-01-06 00:18:50 -0800337 if(!aio_24){
338 err = init_aio_26();
339 if(err && (errno == ENOSYS)){
340 printk("2.6 AIO not supported on the host - "
341 "reverting to 2.4 AIO\n");
342 aio_24 = 1;
343 }
344 else return err;
345 }
Jeff Dike75e55842005-09-03 15:57:45 -0700346
Jeff Diked50084a2006-01-06 00:18:50 -0800347 if(aio_24)
348 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700349
Jeff Diked50084a2006-01-06 00:18:50 -0800350 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700351}
352
353/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
354 * needs to be called when the kernel is running because it calls run_helper,
355 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
356 * kernel does not run __exitcalls on shutdown, and can't because many of them
357 * break when called outside of module unloading.
358 */
359__initcall(init_aio);
360
361static void exit_aio(void)
362{
Jeff Diked50084a2006-01-06 00:18:50 -0800363 if(aio_pid != -1)
364 os_kill_process(aio_pid, 1);
Jeff Dike75e55842005-09-03 15:57:45 -0700365}
366
367__uml_exitcall(exit_aio);
368
Jeff Dike91acb212005-10-10 23:10:32 -0400369static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
370 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700371{
Jeff Diked50084a2006-01-06 00:18:50 -0800372 struct aio_thread_req req = { .type = type,
373 .io_fd = io_fd,
374 .offset = offset,
375 .buf = buf,
376 .len = len,
377 .aio = aio,
378 };
379 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400380
Jeff Dikea61f3342007-05-06 14:51:35 -0700381 err = write(aio_req_fd_w, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800382 if(err == sizeof(req))
383 err = 0;
Jeff Dikea61f3342007-05-06 14:51:35 -0700384 else err = -errno;
Jeff Dike91acb212005-10-10 23:10:32 -0400385
Jeff Diked50084a2006-01-06 00:18:50 -0800386 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400387}
388
389int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800390 unsigned long long offset, int reply_fd,
391 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400392{
Jeff Diked50084a2006-01-06 00:18:50 -0800393 aio->reply_fd = reply_fd;
394 if(aio_24)
395 return submit_aio_24(type, io_fd, buf, len, offset, aio);
396 else {
397 return submit_aio_26(type, io_fd, buf, len, offset, aio);
398 }
Jeff Dike75e55842005-09-03 15:57:45 -0700399}