blob: c1f0f76291cf3c8ab5a159582bd942d97e2c2ff8 [file] [log] [blame]
Jeff Dike75e55842005-09-03 15:57:45 -07001/*
2 * Copyright (C) 2004 Jeff Dike (jdike@addtoit.com)
3 * Licensed under the GPL
4 */
5
6#include <stdlib.h>
7#include <unistd.h>
8#include <signal.h>
9#include <errno.h>
10#include <sched.h>
11#include <sys/syscall.h>
12#include "os.h"
Jeff Dike75e55842005-09-03 15:57:45 -070013#include "aio.h"
14#include "init.h"
15#include "user.h"
16#include "mode.h"
17
Jeff Dike91acb212005-10-10 23:10:32 -040018struct aio_thread_req {
Jeff Diked50084a2006-01-06 00:18:50 -080019 enum aio_type type;
20 int io_fd;
21 unsigned long long offset;
22 char *buf;
23 int len;
24 struct aio_context *aio;
Jeff Dike91acb212005-10-10 23:10:32 -040025};
26
Jeff Dike75e55842005-09-03 15:57:45 -070027#if defined(HAVE_AIO_ABI)
28#include <linux/aio_abi.h>
29
30/* If we have the headers, we are going to build with AIO enabled.
31 * If we don't have aio in libc, we define the necessary stubs here.
32 */
33
34#if !defined(HAVE_AIO_LIBC)
35
36static long io_setup(int n, aio_context_t *ctxp)
37{
Jeff Diked50084a2006-01-06 00:18:50 -080038 return syscall(__NR_io_setup, n, ctxp);
Jeff Dike75e55842005-09-03 15:57:45 -070039}
40
41static long io_submit(aio_context_t ctx, long nr, struct iocb **iocbpp)
42{
Jeff Diked50084a2006-01-06 00:18:50 -080043 return syscall(__NR_io_submit, ctx, nr, iocbpp);
Jeff Dike75e55842005-09-03 15:57:45 -070044}
45
46static long io_getevents(aio_context_t ctx_id, long min_nr, long nr,
Jeff Diked50084a2006-01-06 00:18:50 -080047 struct io_event *events, struct timespec *timeout)
Jeff Dike75e55842005-09-03 15:57:45 -070048{
Jeff Diked50084a2006-01-06 00:18:50 -080049 return syscall(__NR_io_getevents, ctx_id, min_nr, nr, events, timeout);
Jeff Dike75e55842005-09-03 15:57:45 -070050}
51
52#endif
53
54/* The AIO_MMAP cases force the mmapped page into memory here
55 * rather than in whatever place first touches the data. I used
56 * to do this by touching the page, but that's delicate because
57 * gcc is prone to optimizing that away. So, what's done here
58 * is we read from the descriptor from which the page was
59 * mapped. The caller is required to pass an offset which is
60 * inside the page that was mapped. Thus, when the read
61 * returns, we know that the page is in the page cache, and
62 * that it now backs the mmapped area.
63 */
64
Jeff Dike91acb212005-10-10 23:10:32 -040065static int do_aio(aio_context_t ctx, enum aio_type type, int fd, char *buf,
Jeff Diked50084a2006-01-06 00:18:50 -080066 int len, unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -070067{
Jeff Diked50084a2006-01-06 00:18:50 -080068 struct iocb iocb, *iocbp = &iocb;
69 char c;
70 int err;
Jeff Dike75e55842005-09-03 15:57:45 -070071
Jeff Diked50084a2006-01-06 00:18:50 -080072 iocb = ((struct iocb) { .aio_data = (unsigned long) aio,
73 .aio_reqprio = 0,
74 .aio_fildes = fd,
75 .aio_buf = (unsigned long) buf,
76 .aio_nbytes = len,
77 .aio_offset = offset,
78 .aio_reserved1 = 0,
79 .aio_reserved2 = 0,
80 .aio_reserved3 = 0 });
Jeff Dike75e55842005-09-03 15:57:45 -070081
Jeff Diked50084a2006-01-06 00:18:50 -080082 switch(type){
83 case AIO_READ:
84 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
85 err = io_submit(ctx, 1, &iocbp);
86 break;
87 case AIO_WRITE:
88 iocb.aio_lio_opcode = IOCB_CMD_PWRITE;
89 err = io_submit(ctx, 1, &iocbp);
90 break;
91 case AIO_MMAP:
92 iocb.aio_lio_opcode = IOCB_CMD_PREAD;
93 iocb.aio_buf = (unsigned long) &c;
94 iocb.aio_nbytes = sizeof(c);
95 err = io_submit(ctx, 1, &iocbp);
96 break;
97 default:
98 printk("Bogus op in do_aio - %d\n", type);
99 err = -EINVAL;
100 break;
101 }
Jeff Dike09ace812005-09-03 15:57:46 -0700102
Jeff Diked50084a2006-01-06 00:18:50 -0800103 if(err > 0)
104 err = 0;
Jeff Dike2867ace2005-09-16 19:27:51 -0700105 else
106 err = -errno;
Jeff Dike75e55842005-09-03 15:57:45 -0700107
Jeff Diked50084a2006-01-06 00:18:50 -0800108 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700109}
110
Jeff Dike9683da92007-02-10 01:44:27 -0800111/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700112static aio_context_t ctx = 0;
113
114static int aio_thread(void *arg)
115{
Jeff Diked50084a2006-01-06 00:18:50 -0800116 struct aio_thread_reply reply;
117 struct io_event event;
118 int err, n, reply_fd;
Jeff Dike75e55842005-09-03 15:57:45 -0700119
Jeff Diked50084a2006-01-06 00:18:50 -0800120 signal(SIGWINCH, SIG_IGN);
Jeff Dike75e55842005-09-03 15:57:45 -0700121
Jeff Diked50084a2006-01-06 00:18:50 -0800122 while(1){
123 n = io_getevents(ctx, 1, 1, &event, NULL);
124 if(n < 0){
125 if(errno == EINTR)
126 continue;
127 printk("aio_thread - io_getevents failed, "
128 "errno = %d\n", errno);
129 }
130 else {
131 reply = ((struct aio_thread_reply)
132 { .data = (void *) (long) event.data,
133 .err = event.res });
Jeff Dike91acb212005-10-10 23:10:32 -0400134 reply_fd = ((struct aio_context *) reply.data)->reply_fd;
135 err = os_write_file(reply_fd, &reply, sizeof(reply));
Jeff Diked50084a2006-01-06 00:18:50 -0800136 if(err != sizeof(reply))
Jeff Dike91acb212005-10-10 23:10:32 -0400137 printk("aio_thread - write failed, fd = %d, "
Jeff Dike9683da92007-02-10 01:44:27 -0800138 "err = %d\n", reply_fd, -err);
Jeff Diked50084a2006-01-06 00:18:50 -0800139 }
140 }
141 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700142}
143
144#endif
145
Jeff Dike91acb212005-10-10 23:10:32 -0400146static int do_not_aio(struct aio_thread_req *req)
Jeff Dike75e55842005-09-03 15:57:45 -0700147{
Jeff Diked50084a2006-01-06 00:18:50 -0800148 char c;
Jeff Dikeef0470c2007-05-06 14:51:33 -0700149 unsigned long long actual;
Jeff Diked50084a2006-01-06 00:18:50 -0800150 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700151
Jeff Dikeef0470c2007-05-06 14:51:33 -0700152 actual = lseek64(req->io_fd, req->offset, SEEK_SET);
153 if(actual != req->offset)
154 return -errno;
155
Jeff Diked50084a2006-01-06 00:18:50 -0800156 switch(req->type){
157 case AIO_READ:
Jeff Diked50084a2006-01-06 00:18:50 -0800158 err = os_read_file(req->io_fd, req->buf, req->len);
159 break;
160 case AIO_WRITE:
Jeff Diked50084a2006-01-06 00:18:50 -0800161 err = os_write_file(req->io_fd, req->buf, req->len);
162 break;
163 case AIO_MMAP:
Jeff Diked50084a2006-01-06 00:18:50 -0800164 err = os_read_file(req->io_fd, &c, sizeof(c));
165 break;
166 default:
167 printk("do_not_aio - bad request type : %d\n", req->type);
168 err = -EINVAL;
169 break;
170 }
Jeff Dike75e55842005-09-03 15:57:45 -0700171
Jeff Diked50084a2006-01-06 00:18:50 -0800172 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700173}
174
Jeff Dike9683da92007-02-10 01:44:27 -0800175/* These are initialized in initcalls and not changed */
176static int aio_req_fd_r = -1;
177static int aio_req_fd_w = -1;
178static int aio_pid = -1;
179
Jeff Dike75e55842005-09-03 15:57:45 -0700180static int not_aio_thread(void *arg)
181{
Jeff Diked50084a2006-01-06 00:18:50 -0800182 struct aio_thread_req req;
183 struct aio_thread_reply reply;
184 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700185
Jeff Diked50084a2006-01-06 00:18:50 -0800186 signal(SIGWINCH, SIG_IGN);
187 while(1){
188 err = os_read_file(aio_req_fd_r, &req, sizeof(req));
189 if(err != sizeof(req)){
190 if(err < 0)
191 printk("not_aio_thread - read failed, "
192 "fd = %d, err = %d\n", aio_req_fd_r,
193 -err);
194 else {
195 printk("not_aio_thread - short read, fd = %d, "
196 "length = %d\n", aio_req_fd_r, err);
197 }
198 continue;
199 }
200 err = do_not_aio(&req);
201 reply = ((struct aio_thread_reply) { .data = req.aio,
Jeff Dikeef0470c2007-05-06 14:51:33 -0700202 .err = err });
Jeff Diked50084a2006-01-06 00:18:50 -0800203 err = os_write_file(req.aio->reply_fd, &reply, sizeof(reply));
204 if(err != sizeof(reply))
205 printk("not_aio_thread - write failed, fd = %d, "
Jeff Dike9683da92007-02-10 01:44:27 -0800206 "err = %d\n", req.aio->reply_fd, -err);
Jeff Diked50084a2006-01-06 00:18:50 -0800207 }
Jeff Dike1b57e9c2006-01-06 00:18:49 -0800208
209 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700210}
211
Jeff Dike75e55842005-09-03 15:57:45 -0700212static int init_aio_24(void)
213{
Jeff Diked50084a2006-01-06 00:18:50 -0800214 unsigned long stack;
215 int fds[2], err;
Jeff Dike75e55842005-09-03 15:57:45 -0700216
Jeff Diked50084a2006-01-06 00:18:50 -0800217 err = os_pipe(fds, 1, 1);
218 if(err)
219 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700220
Jeff Diked50084a2006-01-06 00:18:50 -0800221 aio_req_fd_w = fds[0];
222 aio_req_fd_r = fds[1];
223 err = run_helper_thread(not_aio_thread, NULL,
224 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
225 if(err < 0)
226 goto out_close_pipe;
Jeff Dike75e55842005-09-03 15:57:45 -0700227
Jeff Diked50084a2006-01-06 00:18:50 -0800228 aio_pid = err;
229 goto out;
Jeff Dike75e55842005-09-03 15:57:45 -0700230
Jeff Diked50084a2006-01-06 00:18:50 -0800231out_close_pipe:
232 os_close_file(fds[0]);
233 os_close_file(fds[1]);
234 aio_req_fd_w = -1;
235 aio_req_fd_r = -1;
236out:
Jeff Dike75e55842005-09-03 15:57:45 -0700237#ifndef HAVE_AIO_ABI
238 printk("/usr/include/linux/aio_abi.h not present during build\n");
239#endif
240 printk("2.6 host AIO support not used - falling back to I/O "
241 "thread\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800242 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700243}
244
245#ifdef HAVE_AIO_ABI
246#define DEFAULT_24_AIO 0
247static int init_aio_26(void)
248{
Jeff Diked50084a2006-01-06 00:18:50 -0800249 unsigned long stack;
250 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700251
Jeff Diked50084a2006-01-06 00:18:50 -0800252 if(io_setup(256, &ctx)){
Jeff Dikeb4fd3102005-09-16 19:27:49 -0700253 err = -errno;
Jeff Diked50084a2006-01-06 00:18:50 -0800254 printk("aio_thread failed to initialize context, err = %d\n",
255 errno);
256 return err;
257 }
Jeff Dike75e55842005-09-03 15:57:45 -0700258
Jeff Diked50084a2006-01-06 00:18:50 -0800259 err = run_helper_thread(aio_thread, NULL,
260 CLONE_FILES | CLONE_VM | SIGCHLD, &stack, 0);
261 if(err < 0)
262 return err;
Jeff Dike75e55842005-09-03 15:57:45 -0700263
Jeff Diked50084a2006-01-06 00:18:50 -0800264 aio_pid = err;
Jeff Dike75e55842005-09-03 15:57:45 -0700265
266 printk("Using 2.6 host AIO\n");
Jeff Diked50084a2006-01-06 00:18:50 -0800267 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700268}
269
Jeff Dike91acb212005-10-10 23:10:32 -0400270static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
271 unsigned long long offset, struct aio_context *aio)
272{
Jeff Diked50084a2006-01-06 00:18:50 -0800273 struct aio_thread_reply reply;
274 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400275
Jeff Diked50084a2006-01-06 00:18:50 -0800276 err = do_aio(ctx, type, io_fd, buf, len, offset, aio);
277 if(err){
278 reply = ((struct aio_thread_reply) { .data = aio,
279 .err = err });
280 err = os_write_file(aio->reply_fd, &reply, sizeof(reply));
281 if(err != sizeof(reply))
282 printk("submit_aio_26 - write failed, "
283 "fd = %d, err = %d\n", aio->reply_fd, -err);
284 else err = 0;
285 }
Jeff Dike91acb212005-10-10 23:10:32 -0400286
Jeff Diked50084a2006-01-06 00:18:50 -0800287 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400288}
289
Jeff Dike75e55842005-09-03 15:57:45 -0700290#else
291#define DEFAULT_24_AIO 1
Jeff Dike91acb212005-10-10 23:10:32 -0400292static int init_aio_26(void)
Jeff Dike75e55842005-09-03 15:57:45 -0700293{
Jeff Diked50084a2006-01-06 00:18:50 -0800294 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700295}
296
Jeff Dike91acb212005-10-10 23:10:32 -0400297static int submit_aio_26(enum aio_type type, int io_fd, char *buf, int len,
298 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700299{
Jeff Diked50084a2006-01-06 00:18:50 -0800300 return -ENOSYS;
Jeff Dike75e55842005-09-03 15:57:45 -0700301}
302#endif
303
Jeff Dike9683da92007-02-10 01:44:27 -0800304/* Initialized in an initcall and unchanged thereafter */
Jeff Dike75e55842005-09-03 15:57:45 -0700305static int aio_24 = DEFAULT_24_AIO;
306
307static int __init set_aio_24(char *name, int *add)
308{
Jeff Diked50084a2006-01-06 00:18:50 -0800309 aio_24 = 1;
310 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700311}
312
313__uml_setup("aio=2.4", set_aio_24,
314"aio=2.4\n"
315" This is used to force UML to use 2.4-style AIO even when 2.6 AIO is\n"
316" available. 2.4 AIO is a single thread that handles one request at a\n"
317" time, synchronously. 2.6 AIO is a thread which uses the 2.6 AIO \n"
318" interface to handle an arbitrary number of pending requests. 2.6 AIO \n"
319" is not available in tt mode, on 2.4 hosts, or when UML is built with\n"
320" /usr/include/linux/aio_abi.h not available. Many distributions don't\n"
321" include aio_abi.h, so you will need to copy it from a kernel tree to\n"
322" your /usr/include/linux in order to build an AIO-capable UML\n\n"
323);
324
325static int init_aio(void)
326{
Jeff Diked50084a2006-01-06 00:18:50 -0800327 int err;
Jeff Dike75e55842005-09-03 15:57:45 -0700328
Jeff Diked50084a2006-01-06 00:18:50 -0800329 CHOOSE_MODE(({ if(!aio_24){
330 printk("Disabling 2.6 AIO in tt mode\n");
331 aio_24 = 1;
332 } }), (void) 0);
Jeff Dike75e55842005-09-03 15:57:45 -0700333
Jeff Diked50084a2006-01-06 00:18:50 -0800334 if(!aio_24){
335 err = init_aio_26();
336 if(err && (errno == ENOSYS)){
337 printk("2.6 AIO not supported on the host - "
338 "reverting to 2.4 AIO\n");
339 aio_24 = 1;
340 }
341 else return err;
342 }
Jeff Dike75e55842005-09-03 15:57:45 -0700343
Jeff Diked50084a2006-01-06 00:18:50 -0800344 if(aio_24)
345 return init_aio_24();
Jeff Dike75e55842005-09-03 15:57:45 -0700346
Jeff Diked50084a2006-01-06 00:18:50 -0800347 return 0;
Jeff Dike75e55842005-09-03 15:57:45 -0700348}
349
350/* The reason for the __initcall/__uml_exitcall asymmetry is that init_aio
351 * needs to be called when the kernel is running because it calls run_helper,
352 * which needs get_free_page. exit_aio is a __uml_exitcall because the generic
353 * kernel does not run __exitcalls on shutdown, and can't because many of them
354 * break when called outside of module unloading.
355 */
356__initcall(init_aio);
357
358static void exit_aio(void)
359{
Jeff Diked50084a2006-01-06 00:18:50 -0800360 if(aio_pid != -1)
361 os_kill_process(aio_pid, 1);
Jeff Dike75e55842005-09-03 15:57:45 -0700362}
363
364__uml_exitcall(exit_aio);
365
Jeff Dike91acb212005-10-10 23:10:32 -0400366static int submit_aio_24(enum aio_type type, int io_fd, char *buf, int len,
367 unsigned long long offset, struct aio_context *aio)
Jeff Dike75e55842005-09-03 15:57:45 -0700368{
Jeff Diked50084a2006-01-06 00:18:50 -0800369 struct aio_thread_req req = { .type = type,
370 .io_fd = io_fd,
371 .offset = offset,
372 .buf = buf,
373 .len = len,
374 .aio = aio,
375 };
376 int err;
Jeff Dike91acb212005-10-10 23:10:32 -0400377
Jeff Diked50084a2006-01-06 00:18:50 -0800378 err = os_write_file(aio_req_fd_w, &req, sizeof(req));
379 if(err == sizeof(req))
380 err = 0;
Jeff Dike91acb212005-10-10 23:10:32 -0400381
Jeff Diked50084a2006-01-06 00:18:50 -0800382 return err;
Jeff Dike91acb212005-10-10 23:10:32 -0400383}
384
385int submit_aio(enum aio_type type, int io_fd, char *buf, int len,
Jeff Diked50084a2006-01-06 00:18:50 -0800386 unsigned long long offset, int reply_fd,
387 struct aio_context *aio)
Jeff Dike91acb212005-10-10 23:10:32 -0400388{
Jeff Diked50084a2006-01-06 00:18:50 -0800389 aio->reply_fd = reply_fd;
390 if(aio_24)
391 return submit_aio_24(type, io_fd, buf, len, offset, aio);
392 else {
393 return submit_aio_26(type, io_fd, buf, len, offset, aio);
394 }
Jeff Dike75e55842005-09-03 15:57:45 -0700395}