| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 2 | * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 3 | * Licensed under the GPL | 
|  | 4 | */ | 
|  | 5 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 6 | #include <stdio.h> | 
|  | 7 | #include <stdlib.h> | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 8 | #include <unistd.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 9 | #include <errno.h> | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 10 | #include <signal.h> | 
|  | 11 | #include <string.h> | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 12 | #include <sys/resource.h> | 
| Jeff Dike | 4ff83ce | 2007-05-06 14:51:08 -0700 | [diff] [blame] | 13 | #include "as-layout.h" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 14 | #include "init.h" | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 15 | #include "kern_constants.h" | 
|  | 16 | #include "kern_util.h" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 17 | #include "os.h" | 
| Paolo 'Blaisorblade' Giarrusso | c13e569 | 2006-10-19 23:28:20 -0700 | [diff] [blame] | 18 | #include "um_malloc.h" | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 19 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | #define PGD_BOUND (4 * 1024 * 1024) | 
|  | 21 | #define STACKSIZE (8 * 1024 * 1024) | 
|  | 22 | #define THREAD_NAME_LEN (256) | 
|  | 23 |  | 
|  | 24 | static void set_stklim(void) | 
|  | 25 | { | 
|  | 26 | struct rlimit lim; | 
|  | 27 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 28 | if (getrlimit(RLIMIT_STACK, &lim) < 0) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 29 | perror("getrlimit"); | 
|  | 30 | exit(1); | 
|  | 31 | } | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 32 | if ((lim.rlim_cur == RLIM_INFINITY) || (lim.rlim_cur > STACKSIZE)) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 33 | lim.rlim_cur = STACKSIZE; | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 34 | if (setrlimit(RLIMIT_STACK, &lim) < 0) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 35 | perror("setrlimit"); | 
|  | 36 | exit(1); | 
|  | 37 | } | 
|  | 38 | } | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 39 | } | 
|  | 40 |  | 
|  | 41 | static __init void do_uml_initcalls(void) | 
|  | 42 | { | 
|  | 43 | initcall_t *call; | 
|  | 44 |  | 
|  | 45 | call = &__uml_initcall_start; | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 46 | while (call < &__uml_initcall_end) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 47 | (*call)(); | 
|  | 48 | call++; | 
|  | 49 | } | 
|  | 50 | } | 
|  | 51 |  | 
|  | 52 | static void last_ditch_exit(int sig) | 
|  | 53 | { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 54 | uml_cleanup(); | 
|  | 55 | exit(1); | 
|  | 56 | } | 
|  | 57 |  | 
| Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 58 | static void install_fatal_handler(int sig) | 
|  | 59 | { | 
|  | 60 | struct sigaction action; | 
|  | 61 |  | 
|  | 62 | /* All signals are enabled in this handler ... */ | 
|  | 63 | sigemptyset(&action.sa_mask); | 
|  | 64 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 65 | /* | 
|  | 66 | * ... including the signal being handled, plus we want the | 
| Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 67 | * handler reset to the default behavior, so that if an exit | 
|  | 68 | * handler is hanging for some reason, the UML will just die | 
|  | 69 | * after this signal is sent a second time. | 
|  | 70 | */ | 
|  | 71 | action.sa_flags = SA_RESETHAND | SA_NODEFER; | 
|  | 72 | action.sa_restorer = NULL; | 
|  | 73 | action.sa_handler = last_ditch_exit; | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 74 | if (sigaction(sig, &action, NULL) < 0) { | 
| Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 75 | printf("failed to install handler for signal %d - errno = %d\n", | 
| WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 76 | sig, errno); | 
| Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 77 | exit(1); | 
|  | 78 | } | 
|  | 79 | } | 
|  | 80 |  | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 81 | #define UML_LIB_PATH	":/usr/lib/uml" | 
|  | 82 |  | 
|  | 83 | static void setup_env_path(void) | 
|  | 84 | { | 
|  | 85 | char *new_path = NULL; | 
|  | 86 | char *old_path = NULL; | 
|  | 87 | int path_len = 0; | 
|  | 88 |  | 
|  | 89 | old_path = getenv("PATH"); | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 90 | /* | 
|  | 91 | * if no PATH variable is set or it has an empty value | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 92 | * just use the default + /usr/lib/uml | 
|  | 93 | */ | 
|  | 94 | if (!old_path || (path_len = strlen(old_path)) == 0) { | 
| WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 95 | if (putenv("PATH=:/bin:/usr/bin/" UML_LIB_PATH)) | 
|  | 96 | perror("couldn't putenv"); | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 97 | return; | 
|  | 98 | } | 
|  | 99 |  | 
|  | 100 | /* append /usr/lib/uml to the existing path */ | 
|  | 101 | path_len += strlen("PATH=" UML_LIB_PATH) + 1; | 
|  | 102 | new_path = malloc(path_len); | 
|  | 103 | if (!new_path) { | 
| WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 104 | perror("couldn't malloc to set a new PATH"); | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 105 | return; | 
|  | 106 | } | 
|  | 107 | snprintf(new_path, path_len, "PATH=%s" UML_LIB_PATH, old_path); | 
| WANG Cong | c9a3072 | 2008-02-04 22:30:35 -0800 | [diff] [blame] | 108 | if (putenv(new_path)) { | 
|  | 109 | perror("couldn't putenv to set a new PATH"); | 
|  | 110 | free(new_path); | 
|  | 111 | } | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 112 | } | 
|  | 113 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 114 | extern void scan_elf_aux( char **envp); | 
|  | 115 |  | 
| Jeff Dike | 36e4546 | 2007-05-06 14:51:11 -0700 | [diff] [blame] | 116 | int __init main(int argc, char **argv, char **envp) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 117 | { | 
|  | 118 | char **new_argv; | 
| Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 119 | int ret, i, err; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 120 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 121 | set_stklim(); | 
|  | 122 |  | 
| Mattia Dongili | cb98cdc | 2006-05-01 12:16:01 -0700 | [diff] [blame] | 123 | setup_env_path(); | 
|  | 124 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 125 | new_argv = malloc((argc + 1) * sizeof(char *)); | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 126 | if (new_argv == NULL) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 127 | perror("Mallocing argv"); | 
|  | 128 | exit(1); | 
|  | 129 | } | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 130 | for (i = 0; i < argc; i++) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 131 | new_argv[i] = strdup(argv[i]); | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 132 | if (new_argv[i] == NULL) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 133 | perror("Mallocing an arg"); | 
|  | 134 | exit(1); | 
|  | 135 | } | 
|  | 136 | } | 
|  | 137 | new_argv[argc] = NULL; | 
|  | 138 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 139 | /* | 
|  | 140 | * Allow these signals to bring down a UML if all other | 
| Jeff Dike | 4b84c69 | 2006-09-25 23:33:04 -0700 | [diff] [blame] | 141 | * methods of control fail. | 
|  | 142 | */ | 
|  | 143 | install_fatal_handler(SIGINT); | 
|  | 144 | install_fatal_handler(SIGTERM); | 
|  | 145 | install_fatal_handler(SIGHUP); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 146 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 147 | scan_elf_aux(envp); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 148 |  | 
|  | 149 | do_uml_initcalls(); | 
|  | 150 | ret = linux_main(argc, argv); | 
|  | 151 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 152 | /* | 
|  | 153 | * Disable SIGPROF - I have no idea why libc doesn't do this or turn | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 154 | * off the profiling time, but UML dies with a SIGPROF just before | 
|  | 155 | * exiting when profiling is active. | 
|  | 156 | */ | 
|  | 157 | change_sig(SIGPROF, 0); | 
|  | 158 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 159 | /* | 
|  | 160 | * This signal stuff used to be in the reboot case.  However, | 
| Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 161 | * sometimes a SIGVTALRM can come in when we're halting (reproducably | 
|  | 162 | * when writing out gcov information, presumably because that takes | 
|  | 163 | * some time) and cause a segfault. | 
|  | 164 | */ | 
| Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 165 |  | 
| Jeff Dike | 61b63c5 | 2007-10-16 01:27:27 -0700 | [diff] [blame] | 166 | /* stop timers and set SIGVTALRM to be ignored */ | 
| Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 167 | disable_timer(); | 
| Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 168 |  | 
| Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 169 | /* disable SIGIO for the fds and set SIGIO to be ignored */ | 
|  | 170 | err = deactivate_all_fds(); | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 171 | if (err) | 
| Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 172 | printf("deactivate_all_fds failed, errno = %d\n", -err); | 
| Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 173 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 174 | /* | 
|  | 175 | * Let any pending signals fire now.  This ensures | 
| Jeff Dike | 52c653b | 2005-11-07 00:58:50 -0800 | [diff] [blame] | 176 | * that they won't be delivered after the exec, when | 
|  | 177 | * they are definitely not expected. | 
|  | 178 | */ | 
|  | 179 | unblock_signals(); | 
| Jeff Dike | 92515da | 2005-05-28 15:51:56 -0700 | [diff] [blame] | 180 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 181 | /* Reboot */ | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 182 | if (ret) { | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | printf("\n"); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 184 | execvp(new_argv[0], new_argv); | 
|  | 185 | perror("Failed to exec kernel"); | 
|  | 186 | ret = 1; | 
|  | 187 | } | 
|  | 188 | printf("\n"); | 
| Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 189 | return uml_exitcode; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 190 | } | 
|  | 191 |  | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 192 | extern void *__real_malloc(int); | 
|  | 193 |  | 
|  | 194 | void *__wrap_malloc(int size) | 
|  | 195 | { | 
|  | 196 | void *ret; | 
|  | 197 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 198 | if (!kmalloc_ok) | 
| Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 199 | return __real_malloc(size); | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 200 | else if (size <= UM_KERN_PAGE_SIZE) | 
| Jeff Dike | c539ab73 | 2007-06-16 10:16:09 -0700 | [diff] [blame] | 201 | /* finding contiguous pages can be hard*/ | 
| Jeff Dike | 43f5b30 | 2008-05-12 14:01:52 -0700 | [diff] [blame] | 202 | ret = uml_kmalloc(size, UM_GFP_KERNEL); | 
| Jeff Dike | e4c4bf99 | 2007-07-15 23:38:56 -0700 | [diff] [blame] | 203 | else ret = vmalloc(size); | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 204 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 205 | /* | 
|  | 206 | * glibc people insist that if malloc fails, errno should be | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 207 | * set by malloc as well. So we do. | 
|  | 208 | */ | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 209 | if (ret == NULL) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 210 | errno = ENOMEM; | 
|  | 211 |  | 
| Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 212 | return ret; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 213 | } | 
|  | 214 |  | 
|  | 215 | void *__wrap_calloc(int n, int size) | 
|  | 216 | { | 
|  | 217 | void *ptr = __wrap_malloc(n * size); | 
|  | 218 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 219 | if (ptr == NULL) | 
| Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 220 | return NULL; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 221 | memset(ptr, 0, n * size); | 
| Jeff Dike | a5ed1ff | 2007-05-06 14:50:58 -0700 | [diff] [blame] | 222 | return ptr; | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | } | 
|  | 224 |  | 
|  | 225 | extern void __real_free(void *); | 
|  | 226 |  | 
|  | 227 | extern unsigned long high_physmem; | 
|  | 228 |  | 
|  | 229 | void __wrap_free(void *ptr) | 
|  | 230 | { | 
|  | 231 | unsigned long addr = (unsigned long) ptr; | 
|  | 232 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 233 | /* | 
|  | 234 | * We need to know how the allocation happened, so it can be correctly | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 235 | * freed.  This is done by seeing what region of memory the pointer is | 
|  | 236 | * in - | 
|  | 237 | * 	physical memory - kmalloc/kfree | 
|  | 238 | *	kernel virtual memory - vmalloc/vfree | 
|  | 239 | * 	anywhere else - malloc/free | 
|  | 240 | * If kmalloc is not yet possible, then either high_physmem and/or | 
|  | 241 | * end_vm are still 0 (as at startup), in which case we call free, or | 
|  | 242 | * we have set them, but anyway addr has not been allocated from those | 
|  | 243 | * areas. So, in both cases __real_free is called. | 
|  | 244 | * | 
|  | 245 | * CAN_KMALLOC is checked because it would be bad to free a buffer | 
|  | 246 | * with kmalloc/vmalloc after they have been turned off during | 
|  | 247 | * shutdown. | 
|  | 248 | * XXX: However, we sometimes shutdown CAN_KMALLOC temporarily, so | 
|  | 249 | * there is a possibility for memory leaks. | 
|  | 250 | */ | 
|  | 251 |  | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 252 | if ((addr >= uml_physmem) && (addr < high_physmem)) { | 
|  | 253 | if (kmalloc_ok) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 254 | kfree(ptr); | 
|  | 255 | } | 
| Jeff Dike | ba180fd | 2007-10-16 01:27:00 -0700 | [diff] [blame] | 256 | else if ((addr >= start_vm) && (addr < end_vm)) { | 
|  | 257 | if (kmalloc_ok) | 
| Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 258 | vfree(ptr); | 
|  | 259 | } | 
|  | 260 | else __real_free(ptr); | 
|  | 261 | } |