Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) |
| 3 | * Licensed under the GPL |
| 4 | */ |
| 5 | |
| 6 | #include "linux/mm.h" |
| 7 | #include "linux/rbtree.h" |
| 8 | #include "linux/slab.h" |
| 9 | #include "linux/vmalloc.h" |
| 10 | #include "linux/bootmem.h" |
| 11 | #include "linux/module.h" |
| 12 | #include "asm/types.h" |
| 13 | #include "asm/pgtable.h" |
| 14 | #include "kern_util.h" |
| 15 | #include "user_util.h" |
| 16 | #include "mode_kern.h" |
| 17 | #include "mem.h" |
| 18 | #include "mem_user.h" |
| 19 | #include "os.h" |
| 20 | #include "kern.h" |
| 21 | #include "init.h" |
| 22 | |
| 23 | struct phys_desc { |
| 24 | struct rb_node rb; |
| 25 | int fd; |
| 26 | __u64 offset; |
| 27 | void *virt; |
| 28 | unsigned long phys; |
| 29 | struct list_head list; |
| 30 | }; |
| 31 | |
| 32 | static struct rb_root phys_mappings = RB_ROOT; |
| 33 | |
| 34 | static struct rb_node **find_rb(void *virt) |
| 35 | { |
| 36 | struct rb_node **n = &phys_mappings.rb_node; |
| 37 | struct phys_desc *d; |
| 38 | |
| 39 | while(*n != NULL){ |
| 40 | d = rb_entry(*n, struct phys_desc, rb); |
| 41 | if(d->virt == virt) |
| 42 | return(n); |
| 43 | |
| 44 | if(d->virt > virt) |
| 45 | n = &(*n)->rb_left; |
| 46 | else |
| 47 | n = &(*n)->rb_right; |
| 48 | } |
| 49 | |
| 50 | return(n); |
| 51 | } |
| 52 | |
| 53 | static struct phys_desc *find_phys_mapping(void *virt) |
| 54 | { |
| 55 | struct rb_node **n = find_rb(virt); |
| 56 | |
| 57 | if(*n == NULL) |
| 58 | return(NULL); |
| 59 | |
| 60 | return(rb_entry(*n, struct phys_desc, rb)); |
| 61 | } |
| 62 | |
| 63 | static void insert_phys_mapping(struct phys_desc *desc) |
| 64 | { |
| 65 | struct rb_node **n = find_rb(desc->virt); |
| 66 | |
| 67 | if(*n != NULL) |
| 68 | panic("Physical remapping for %p already present", |
| 69 | desc->virt); |
| 70 | |
| 71 | rb_link_node(&desc->rb, (*n)->rb_parent, n); |
| 72 | rb_insert_color(&desc->rb, &phys_mappings); |
| 73 | } |
| 74 | |
| 75 | LIST_HEAD(descriptor_mappings); |
| 76 | |
| 77 | struct desc_mapping { |
| 78 | int fd; |
| 79 | struct list_head list; |
| 80 | struct list_head pages; |
| 81 | }; |
| 82 | |
| 83 | static struct desc_mapping *find_mapping(int fd) |
| 84 | { |
| 85 | struct desc_mapping *desc; |
| 86 | struct list_head *ele; |
| 87 | |
| 88 | list_for_each(ele, &descriptor_mappings){ |
| 89 | desc = list_entry(ele, struct desc_mapping, list); |
| 90 | if(desc->fd == fd) |
| 91 | return(desc); |
| 92 | } |
| 93 | |
| 94 | return(NULL); |
| 95 | } |
| 96 | |
| 97 | static struct desc_mapping *descriptor_mapping(int fd) |
| 98 | { |
| 99 | struct desc_mapping *desc; |
| 100 | |
| 101 | desc = find_mapping(fd); |
| 102 | if(desc != NULL) |
| 103 | return(desc); |
| 104 | |
| 105 | desc = kmalloc(sizeof(*desc), GFP_ATOMIC); |
| 106 | if(desc == NULL) |
| 107 | return(NULL); |
| 108 | |
| 109 | *desc = ((struct desc_mapping) |
| 110 | { .fd = fd, |
| 111 | .list = LIST_HEAD_INIT(desc->list), |
| 112 | .pages = LIST_HEAD_INIT(desc->pages) }); |
| 113 | list_add(&desc->list, &descriptor_mappings); |
| 114 | |
| 115 | return(desc); |
| 116 | } |
| 117 | |
| 118 | int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) |
| 119 | { |
| 120 | struct desc_mapping *fd_maps; |
| 121 | struct phys_desc *desc; |
| 122 | unsigned long phys; |
| 123 | int err; |
| 124 | |
| 125 | fd_maps = descriptor_mapping(fd); |
| 126 | if(fd_maps == NULL) |
| 127 | return(-ENOMEM); |
| 128 | |
| 129 | phys = __pa(virt); |
| 130 | desc = find_phys_mapping(virt); |
| 131 | if(desc != NULL) |
| 132 | panic("Address 0x%p is already substituted\n", virt); |
| 133 | |
| 134 | err = -ENOMEM; |
| 135 | desc = kmalloc(sizeof(*desc), GFP_ATOMIC); |
| 136 | if(desc == NULL) |
| 137 | goto out; |
| 138 | |
| 139 | *desc = ((struct phys_desc) |
| 140 | { .fd = fd, |
| 141 | .offset = offset, |
| 142 | .virt = virt, |
| 143 | .phys = __pa(virt), |
| 144 | .list = LIST_HEAD_INIT(desc->list) }); |
| 145 | insert_phys_mapping(desc); |
| 146 | |
| 147 | list_add(&desc->list, &fd_maps->pages); |
| 148 | |
| 149 | virt = (void *) ((unsigned long) virt & PAGE_MASK); |
| 150 | err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); |
| 151 | if(!err) |
| 152 | goto out; |
| 153 | |
| 154 | rb_erase(&desc->rb, &phys_mappings); |
| 155 | kfree(desc); |
| 156 | out: |
| 157 | return(err); |
| 158 | } |
| 159 | |
| 160 | static int physmem_fd = -1; |
| 161 | |
| 162 | static void remove_mapping(struct phys_desc *desc) |
| 163 | { |
| 164 | void *virt = desc->virt; |
| 165 | int err; |
| 166 | |
| 167 | rb_erase(&desc->rb, &phys_mappings); |
| 168 | list_del(&desc->list); |
| 169 | kfree(desc); |
| 170 | |
| 171 | err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); |
| 172 | if(err) |
| 173 | panic("Failed to unmap block device page from physical memory, " |
| 174 | "errno = %d", -err); |
| 175 | } |
| 176 | |
| 177 | int physmem_remove_mapping(void *virt) |
| 178 | { |
| 179 | struct phys_desc *desc; |
| 180 | |
| 181 | virt = (void *) ((unsigned long) virt & PAGE_MASK); |
| 182 | desc = find_phys_mapping(virt); |
| 183 | if(desc == NULL) |
| 184 | return(0); |
| 185 | |
| 186 | remove_mapping(desc); |
| 187 | return(1); |
| 188 | } |
| 189 | |
| 190 | void physmem_forget_descriptor(int fd) |
| 191 | { |
| 192 | struct desc_mapping *desc; |
| 193 | struct phys_desc *page; |
| 194 | struct list_head *ele, *next; |
| 195 | __u64 offset; |
| 196 | void *addr; |
| 197 | int err; |
| 198 | |
| 199 | desc = find_mapping(fd); |
| 200 | if(desc == NULL) |
| 201 | return; |
| 202 | |
| 203 | list_for_each_safe(ele, next, &desc->pages){ |
| 204 | page = list_entry(ele, struct phys_desc, list); |
| 205 | offset = page->offset; |
| 206 | addr = page->virt; |
| 207 | remove_mapping(page); |
| 208 | err = os_seek_file(fd, offset); |
| 209 | if(err) |
| 210 | panic("physmem_forget_descriptor - failed to seek " |
| 211 | "to %lld in fd %d, error = %d\n", |
| 212 | offset, fd, -err); |
| 213 | err = os_read_file(fd, addr, PAGE_SIZE); |
| 214 | if(err < 0) |
| 215 | panic("physmem_forget_descriptor - failed to read " |
| 216 | "from fd %d to 0x%p, error = %d\n", |
| 217 | fd, addr, -err); |
| 218 | } |
| 219 | |
| 220 | list_del(&desc->list); |
| 221 | kfree(desc); |
| 222 | } |
| 223 | |
| 224 | EXPORT_SYMBOL(physmem_forget_descriptor); |
| 225 | EXPORT_SYMBOL(physmem_remove_mapping); |
| 226 | EXPORT_SYMBOL(physmem_subst_mapping); |
| 227 | |
| 228 | void arch_free_page(struct page *page, int order) |
| 229 | { |
| 230 | void *virt; |
| 231 | int i; |
| 232 | |
| 233 | for(i = 0; i < (1 << order); i++){ |
| 234 | virt = __va(page_to_phys(page + i)); |
| 235 | physmem_remove_mapping(virt); |
| 236 | } |
| 237 | } |
| 238 | |
| 239 | int is_remapped(void *virt) |
| 240 | { |
| 241 | struct phys_desc *desc = find_phys_mapping(virt); |
| 242 | |
| 243 | return(desc != NULL); |
| 244 | } |
| 245 | |
| 246 | /* Changed during early boot */ |
| 247 | unsigned long high_physmem; |
| 248 | |
| 249 | extern unsigned long physmem_size; |
| 250 | |
| 251 | void *to_virt(unsigned long phys) |
| 252 | { |
| 253 | return((void *) uml_physmem + phys); |
| 254 | } |
| 255 | |
| 256 | unsigned long to_phys(void *virt) |
| 257 | { |
| 258 | return(((unsigned long) virt) - uml_physmem); |
| 259 | } |
| 260 | |
| 261 | int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) |
| 262 | { |
| 263 | struct page *p, *map; |
| 264 | unsigned long phys_len, phys_pages, highmem_len, highmem_pages; |
| 265 | unsigned long iomem_len, iomem_pages, total_len, total_pages; |
| 266 | int i; |
| 267 | |
| 268 | phys_pages = physmem >> PAGE_SHIFT; |
| 269 | phys_len = phys_pages * sizeof(struct page); |
| 270 | |
| 271 | iomem_pages = iomem >> PAGE_SHIFT; |
| 272 | iomem_len = iomem_pages * sizeof(struct page); |
| 273 | |
| 274 | highmem_pages = highmem >> PAGE_SHIFT; |
| 275 | highmem_len = highmem_pages * sizeof(struct page); |
| 276 | |
| 277 | total_pages = phys_pages + iomem_pages + highmem_pages; |
| 278 | total_len = phys_len + iomem_pages + highmem_len; |
| 279 | |
| 280 | if(kmalloc_ok){ |
| 281 | map = kmalloc(total_len, GFP_KERNEL); |
| 282 | if(map == NULL) |
| 283 | map = vmalloc(total_len); |
| 284 | } |
| 285 | else map = alloc_bootmem_low_pages(total_len); |
| 286 | |
| 287 | if(map == NULL) |
| 288 | return(-ENOMEM); |
| 289 | |
| 290 | for(i = 0; i < total_pages; i++){ |
| 291 | p = &map[i]; |
| 292 | set_page_count(p, 0); |
| 293 | SetPageReserved(p); |
| 294 | INIT_LIST_HEAD(&p->lru); |
| 295 | } |
| 296 | |
| 297 | max_mapnr = total_pages; |
| 298 | return(0); |
| 299 | } |
| 300 | |
| 301 | struct page *phys_to_page(const unsigned long phys) |
| 302 | { |
| 303 | return(&mem_map[phys >> PAGE_SHIFT]); |
| 304 | } |
| 305 | |
| 306 | struct page *__virt_to_page(const unsigned long virt) |
| 307 | { |
| 308 | return(&mem_map[__pa(virt) >> PAGE_SHIFT]); |
| 309 | } |
| 310 | |
| 311 | phys_t page_to_phys(struct page *page) |
| 312 | { |
| 313 | return((page - mem_map) << PAGE_SHIFT); |
| 314 | } |
| 315 | |
| 316 | pte_t mk_pte(struct page *page, pgprot_t pgprot) |
| 317 | { |
| 318 | pte_t pte; |
| 319 | |
| 320 | pte_set_val(pte, page_to_phys(page), pgprot); |
| 321 | if(pte_present(pte)) |
| 322 | pte_mknewprot(pte_mknewpage(pte)); |
| 323 | return(pte); |
| 324 | } |
| 325 | |
| 326 | /* Changed during early boot */ |
| 327 | static unsigned long kmem_top = 0; |
| 328 | |
| 329 | unsigned long get_kmem_end(void) |
| 330 | { |
| 331 | if(kmem_top == 0) |
| 332 | kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); |
| 333 | return(kmem_top); |
| 334 | } |
| 335 | |
| 336 | void map_memory(unsigned long virt, unsigned long phys, unsigned long len, |
| 337 | int r, int w, int x) |
| 338 | { |
| 339 | __u64 offset; |
| 340 | int fd, err; |
| 341 | |
| 342 | fd = phys_mapping(phys, &offset); |
| 343 | err = os_map_memory((void *) virt, fd, offset, len, r, w, x); |
| 344 | if(err) { |
| 345 | if(err == -ENOMEM) |
| 346 | printk("try increasing the host's " |
| 347 | "/proc/sys/vm/max_map_count to <physical " |
| 348 | "memory size>/4096\n"); |
| 349 | panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " |
| 350 | "err = %d\n", virt, fd, offset, len, r, w, x, err); |
| 351 | } |
| 352 | } |
| 353 | |
| 354 | #define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) |
| 355 | |
| 356 | void setup_physmem(unsigned long start, unsigned long reserve_end, |
| 357 | unsigned long len, unsigned long highmem) |
| 358 | { |
| 359 | unsigned long reserve = reserve_end - start; |
| 360 | int pfn = PFN_UP(__pa(reserve_end)); |
| 361 | int delta = (len - reserve) >> PAGE_SHIFT; |
| 362 | int err, offset, bootmap_size; |
| 363 | |
| 364 | physmem_fd = create_mem_file(len + highmem); |
| 365 | |
| 366 | offset = uml_reserved - uml_physmem; |
| 367 | err = os_map_memory((void *) uml_reserved, physmem_fd, offset, |
| 368 | len - offset, 1, 1, 0); |
| 369 | if(err < 0){ |
| 370 | os_print_error(err, "Mapping memory"); |
| 371 | exit(1); |
| 372 | } |
| 373 | |
| 374 | bootmap_size = init_bootmem(pfn, pfn + delta); |
| 375 | free_bootmem(__pa(reserve_end) + bootmap_size, |
| 376 | len - bootmap_size - reserve); |
| 377 | } |
| 378 | |
| 379 | int phys_mapping(unsigned long phys, __u64 *offset_out) |
| 380 | { |
| 381 | struct phys_desc *desc = find_phys_mapping(__va(phys & PAGE_MASK)); |
| 382 | int fd = -1; |
| 383 | |
| 384 | if(desc != NULL){ |
| 385 | fd = desc->fd; |
| 386 | *offset_out = desc->offset; |
| 387 | } |
| 388 | else if(phys < physmem_size){ |
| 389 | fd = physmem_fd; |
| 390 | *offset_out = phys; |
| 391 | } |
| 392 | else if(phys < __pa(end_iomem)){ |
| 393 | struct iomem_region *region = iomem_regions; |
| 394 | |
| 395 | while(region != NULL){ |
| 396 | if((phys >= region->phys) && |
| 397 | (phys < region->phys + region->size)){ |
| 398 | fd = region->fd; |
| 399 | *offset_out = phys - region->phys; |
| 400 | break; |
| 401 | } |
| 402 | region = region->next; |
| 403 | } |
| 404 | } |
| 405 | else if(phys < __pa(end_iomem) + highmem){ |
| 406 | fd = physmem_fd; |
| 407 | *offset_out = phys - iomem_size; |
| 408 | } |
| 409 | |
| 410 | return(fd); |
| 411 | } |
| 412 | |
| 413 | static int __init uml_mem_setup(char *line, int *add) |
| 414 | { |
| 415 | char *retptr; |
| 416 | physmem_size = memparse(line,&retptr); |
| 417 | return 0; |
| 418 | } |
| 419 | __uml_setup("mem=", uml_mem_setup, |
| 420 | "mem=<Amount of desired ram>\n" |
| 421 | " This controls how much \"physical\" memory the kernel allocates\n" |
| 422 | " for the system. The size is specified as a number followed by\n" |
| 423 | " one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" |
| 424 | " This is not related to the amount of memory in the host. It can\n" |
| 425 | " be more, and the excess, if it's ever used, will just be swapped out.\n" |
| 426 | " Example: mem=64M\n\n" |
| 427 | ); |
| 428 | |
| 429 | unsigned long find_iomem(char *driver, unsigned long *len_out) |
| 430 | { |
| 431 | struct iomem_region *region = iomem_regions; |
| 432 | |
| 433 | while(region != NULL){ |
| 434 | if(!strcmp(region->driver, driver)){ |
| 435 | *len_out = region->size; |
| 436 | return(region->virt); |
| 437 | } |
| 438 | } |
| 439 | |
| 440 | return(0); |
| 441 | } |
| 442 | |
| 443 | int setup_iomem(void) |
| 444 | { |
| 445 | struct iomem_region *region = iomem_regions; |
| 446 | unsigned long iomem_start = high_physmem + PAGE_SIZE; |
| 447 | int err; |
| 448 | |
| 449 | while(region != NULL){ |
| 450 | err = os_map_memory((void *) iomem_start, region->fd, 0, |
| 451 | region->size, 1, 1, 0); |
| 452 | if(err) |
| 453 | printk("Mapping iomem region for driver '%s' failed, " |
| 454 | "errno = %d\n", region->driver, -err); |
| 455 | else { |
| 456 | region->virt = iomem_start; |
| 457 | region->phys = __pa(region->virt); |
| 458 | } |
| 459 | |
| 460 | iomem_start += region->size + PAGE_SIZE; |
| 461 | region = region->next; |
| 462 | } |
| 463 | |
| 464 | return(0); |
| 465 | } |
| 466 | |
| 467 | __initcall(setup_iomem); |
| 468 | |
| 469 | /* |
| 470 | * Overrides for Emacs so that we follow Linus's tabbing style. |
| 471 | * Emacs will notice this stuff at the end of the file and automatically |
| 472 | * adjust the settings for this buffer only. This must remain at the end |
| 473 | * of the file. |
| 474 | * --------------------------------------------------------------------------- |
| 475 | * Local variables: |
| 476 | * c-file-style: "linux" |
| 477 | * End: |
| 478 | */ |