blob: 9bf944f6a1dbe75639e4ae029938929c7b0d03d0 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027#if defined(HAVE_AIO_ABI)
28#include <linux/aio_abi.h>
29
30/* If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
Jeff Diked50084a2006-01-06 00:18:50 -080038 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070039}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
Jeff Diked50084a2006-01-06 00:18:50 -080043 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070044}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080047 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070048{
Jeff Diked50084a2006-01-06 00:18:50 -080049 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070050}
51
52#endif
53
54/* The AIO_MMAP cases force the mmapped page into memory here
55 * rather than in whatever place first touches the data. I used
56 * to do this by touching the page, but that's delicate because
57 * gcc is prone to optimizing that away. So, what's done here
58 * is we read from the descriptor from which the page was
59 * mapped. The caller is required to pass an offset which is
60 * inside the page that was mapped. Thus, when the read
61 * returns, we know that the page is in the page cache, and
62 * that it now backs the mmapped area.
63 */
64
Jeff Dike91acb212005-10-10 23:10:32 -040065static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080066 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070067{
Jeff Diked50084a2006-01-06 00:18:50 -080068 struct iocb iocb, *iocbp = &iocb;
69 char c;
70 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070071
Jeff Diked50084a2006-01-06 00:18:50 -080072 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
73 .aio_reqprio = 0,
74 .aio_fildes = fd,
75 .aio_buf = (unsigned long) buf,
76 .aio_nbytes = len,
77 .aio_offset = offset,
78 .aio_reserved1 = 0,
79 .aio_reserved2 = 0,
80 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070081
Jeff Diked50084a2006-01-06 00:18:50 -080082 switch(type){
83 case AIO_READ:
84 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
85 err = io_submit(ctx, 1, &iocbp);
86 break;
87 case AIO_WRITE:
88 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_MMAP:
92 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
93 iocb.aio_buf = (unsigned long) &c;
94 iocb.aio_nbytes = sizeof(c);
95 err = io_submit(ctx, 1, &iocbp);
96 break;
97 default:
98 printk("Bogus op in do_aio - %d\n", type);
99 err = -EINVAL;
100 break;
101 }
Jeff Dike09ace812005-09-03 15:57:46 -0700102
Jeff Diked50084a2006-01-06 00:18:50 -0800103 if(err > 0)
104 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700105 else
106 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700107
Jeff Diked50084a2006-01-06 00:18:50 -0800108 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700109}
110
Jeff Dike9683da92007-02-10 01:44:27 -0800111/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
Jeff Diked50084a2006-01-06 00:18:50 -0800116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700119
Jeff Diked50084a2006-01-06 00:18:50 -0800120 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
Jeff Dikea61f3342007-05-06 14:51:35 -0700135 err = write(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800136 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400137 printk("aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700138 "err = %d\n", reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800139 }
140 }
141 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700142}
143
144#endif
145
Jeff Dike91acb212005-10-10 23:10:32 -0400146static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700147{
Jeff Diked50084a2006-01-06 00:18:50 -0800148 char c;
Jeff Dikeef0470c2007-05-06 14:51:33 -0700149 unsigned long long actual;
Jeff Dikea61f3342007-05-06 14:51:35 -0700150 int n;
Jeff Dike75e55842005-09-03 15:57:45 -0700151
Jeff Dikeef0470c2007-05-06 14:51:33 -0700152 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
153 if(actual != req->offset)
154 return -errno;
155
Jeff Diked50084a2006-01-06 00:18:50 -0800156 switch(req->type){
157 case AIO_READ:
Jeff Dikea61f3342007-05-06 14:51:35 -0700158 n = read(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800159 break;
160 case AIO_WRITE:
Jeff Dikea61f3342007-05-06 14:51:35 -0700161 n = write(req->io_fd, req->buf, req->len);
Jeff Diked50084a2006-01-06 00:18:50 -0800162 break;
163 case AIO_MMAP:
Jeff Dikea61f3342007-05-06 14:51:35 -0700164 n = read(req->io_fd, &c, sizeof(c));
Jeff Diked50084a2006-01-06 00:18:50 -0800165 break;
166 default:
167 printk("do_not_aio - bad request type : %d\n", req->type);
Jeff Dikea61f3342007-05-06 14:51:35 -0700168 return -EINVAL;
Jeff Diked50084a2006-01-06 00:18:50 -0800169 }
Jeff Dike75e55842005-09-03 15:57:45 -0700170
Jeff Dikea61f3342007-05-06 14:51:35 -0700171 if(n < 0)
172 return -errno;
173 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700174}
175
Jeff Dike9683da92007-02-10 01:44:27 -0800176/* These are initialized in initcalls and not changed */
177static int aio_req_fd_r = -1;
178static int aio_req_fd_w = -1;
179static int aio_pid = -1;
180
Jeff Dike75e55842005-09-03 15:57:45 -0700181static int not_aio_thread(void *arg)
182{
Jeff Diked50084a2006-01-06 00:18:50 -0800183 struct aio_thread_req req;
184 struct aio_thread_reply reply;
185 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700186
Jeff Diked50084a2006-01-06 00:18:50 -0800187 signal(SIGWINCH, SIG_IGN);
188 while(1){
Jeff Dikea61f3342007-05-06 14:51:35 -0700189 err = read(aio_req_fd_r, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800190 if(err != sizeof(req)){
191 if(err < 0)
192 printk("not_aio_thread - read failed, "
193 "fd = %d, err = %d\n", aio_req_fd_r,
Jeff Dikea61f3342007-05-06 14:51:35 -0700194 errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800195 else {
196 printk("not_aio_thread - short read, fd = %d, "
197 "length = %d\n", aio_req_fd_r, err);
198 }
199 continue;
200 }
201 err = do_not_aio(&req);
202 reply = ((struct aio_thread_reply) { .data = req.aio,
Jeff Dikeef0470c2007-05-06 14:51:33 -0700203 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700204 err = write(req.aio->reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800205 if(err != sizeof(reply))
206 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dikea61f3342007-05-06 14:51:35 -0700207 "err = %d\n", req.aio->reply_fd, errno);
Jeff Diked50084a2006-01-06 00:18:50 -0800208 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800209
210 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700211}
212
Jeff Dike75e55842005-09-03 15:57:45 -0700213static int init_aio_24(void)
214{
Jeff Diked50084a2006-01-06 00:18:50 -0800215 unsigned long stack;
216 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700217
Jeff Diked50084a2006-01-06 00:18:50 -0800218 err = os_pipe(fds, 1, 1);
219 if(err)
220 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700221
Jeff Diked50084a2006-01-06 00:18:50 -0800222 aio_req_fd_w = fds[0];
223 aio_req_fd_r = fds[1];
Jeff Dike8603ec82007-05-06 14:51:44 -0700224
225 err = os_set_fd_block(aio_req_fd_w, 0);
226 if(err)
227 goto out_close_pipe;
228
Jeff Diked50084a2006-01-06 00:18:50 -0800229 err = run_helper_thread(not_aio_thread, NULL,
230 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
231 if(err < 0)
232 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700233
Jeff Diked50084a2006-01-06 00:18:50 -0800234 aio_pid = err;
235 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700236
Jeff Diked50084a2006-01-06 00:18:50 -0800237out_close_pipe:
238 os_close_file(fds[0]);
239 os_close_file(fds[1]);
240 aio_req_fd_w = -1;
241 aio_req_fd_r = -1;
242out:
Jeff Dike75e55842005-09-03 15:57:45 -0700243#ifndef HAVE_AIO_ABI
244 printk("/usr/include/linux/aio_abi.h not present during build\n");
245#endif
246 printk("2.6 host AIO support not used - falling back to I/O "
247 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800248 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700249}
250
251#ifdef HAVE_AIO_ABI
252#define DEFAULT_24_AIO 0
253static int init_aio_26(void)
254{
Jeff Diked50084a2006-01-06 00:18:50 -0800255 unsigned long stack;
256 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700257
Jeff Diked50084a2006-01-06 00:18:50 -0800258 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700259 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800260 printk("aio_thread failed to initialize context, err = %d\n",
261 errno);
262 return err;
263 }
Jeff Dike75e55842005-09-03 15:57:45 -0700264
Jeff Diked50084a2006-01-06 00:18:50 -0800265 err = run_helper_thread(aio_thread, NULL,
266 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
267 if(err < 0)
268 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700269
Jeff Diked50084a2006-01-06 00:18:50 -0800270 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700271
272 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800273 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700274}
275
Jeff Dike91acb212005-10-10 23:10:32 -0400276static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
277 unsigned long long offset, struct aio_context *aio)
278{
Jeff Diked50084a2006-01-06 00:18:50 -0800279 struct aio_thread_reply reply;
280 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400281
Jeff Diked50084a2006-01-06 00:18:50 -0800282 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
283 if(err){
284 reply = ((struct aio_thread_reply) { .data = aio,
285 .err = err });
Jeff Dikea61f3342007-05-06 14:51:35 -0700286 err = write(aio->reply_fd, &reply, sizeof(reply));
287 if(err != sizeof(reply)){
288 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800289 printk("submit_aio_26 - write failed, "
290 "fd = %d, err = %d\n", aio->reply_fd, -err);
Jeff Dikea61f3342007-05-06 14:51:35 -0700291 }
Jeff Diked50084a2006-01-06 00:18:50 -0800292 else err = 0;
293 }
Jeff Dike91acb212005-10-10 23:10:32 -0400294
Jeff Diked50084a2006-01-06 00:18:50 -0800295 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400296}
297
Jeff Dike75e55842005-09-03 15:57:45 -0700298#else
299#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400300static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700301{
Jeff Diked50084a2006-01-06 00:18:50 -0800302 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700303}
304
Jeff Dike91acb212005-10-10 23:10:32 -0400305static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
306 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700307{
Jeff Diked50084a2006-01-06 00:18:50 -0800308 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700309}
310#endif
311
Jeff Dike9683da92007-02-10 01:44:27 -0800312/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700313static int aio_24 = DEFAULT_24_AIO;
314
315static int __init set_aio_24(char *name, int *add)
316{
Jeff Diked50084a2006-01-06 00:18:50 -0800317 aio_24 = 1;
318 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700319}
320
321__uml_setup("aio=2.4", set_aio_24,
322"aio=2.4\n"
323" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
324" available. 2.4 AIO is a single thread that handles one request at a\n"
325" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
326" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
327" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
328" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
329" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
330" your /usr/include/linux in order to build an AIO-capable UML\n\n"
331);
332
333static int init_aio(void)
334{
Jeff Diked50084a2006-01-06 00:18:50 -0800335 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700336
Jeff Diked50084a2006-01-06 00:18:50 -0800337 CHOOSE_MODE(({ if(!aio_24){
338 printk("Disabling 2.6 AIO in tt mode\n");
339 aio_24 = 1;
340 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700341
Jeff Diked50084a2006-01-06 00:18:50 -0800342 if(!aio_24){
343 err = init_aio_26();
344 if(err && (errno == ENOSYS)){
345 printk("2.6 AIO not supported on the host - "
346 "reverting to 2.4 AIO\n");
347 aio_24 = 1;
348 }
349 else return err;
350 }
Jeff Dike75e55842005-09-03 15:57:45 -0700351
Jeff Diked50084a2006-01-06 00:18:50 -0800352 if(aio_24)
353 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700354
Jeff Diked50084a2006-01-06 00:18:50 -0800355 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700356}
357
358/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
359 * needs to be called when the kernel is running because it calls run_helper,
360 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
361 * kernel does not run __exitcalls on shutdown, and can't because many of them
362 * break when called outside of module unloading.
363 */
364__initcall(init_aio);
365
366static void exit_aio(void)
367{
Jeff Diked50084a2006-01-06 00:18:50 -0800368 if(aio_pid != -1)
369 os_kill_process(aio_pid, 1);
Jeff Dike75e55842005-09-03 15:57:45 -0700370}
371
372__uml_exitcall(exit_aio);
373
Jeff Dike91acb212005-10-10 23:10:32 -0400374static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
375 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700376{
Jeff Diked50084a2006-01-06 00:18:50 -0800377 struct aio_thread_req req = { .type = type,
378 .io_fd = io_fd,
379 .offset = offset,
380 .buf = buf,
381 .len = len,
382 .aio = aio,
383 };
384 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400385
Jeff Dikea61f3342007-05-06 14:51:35 -0700386 err = write(aio_req_fd_w, &req, sizeof(req));
Jeff Diked50084a2006-01-06 00:18:50 -0800387 if(err == sizeof(req))
388 err = 0;
Jeff Dikea61f3342007-05-06 14:51:35 -0700389 else err = -errno;
Jeff Dike91acb212005-10-10 23:10:32 -0400390
Jeff Diked50084a2006-01-06 00:18:50 -0800391 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400392}
393
394int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800395 unsigned long long offset, int reply_fd,
396 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400397{
Jeff Diked50084a2006-01-06 00:18:50 -0800398 aio->reply_fd = reply_fd;
399 if(aio_24)
400 return submit_aio_24(type, io_fd, buf, len, offset, aio);
401 else {
402 return submit_aio_26(type, io_fd, buf, len, offset, aio);
403 }
Jeff Dike75e55842005-09-03 15:57:45 -0700404}