Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 1 | /* |
| 2 | * linux/arch/i386/kernel/sysenter.c |
| 3 | * |
| 4 | * (C) Copyright 2002 Linus Torvalds |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 5 | * Portions based on the vdso-randomization code from exec-shield: |
| 6 | * Copyright(C) 2005-2006, Red Hat, Inc., Ingo Molnar |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 7 | * |
| 8 | * This file contains the needed initializations to support sysenter. |
| 9 | */ |
| 10 | |
| 11 | #include <linux/init.h> |
| 12 | #include <linux/smp.h> |
| 13 | #include <linux/thread_info.h> |
| 14 | #include <linux/sched.h> |
| 15 | #include <linux/gfp.h> |
| 16 | #include <linux/string.h> |
| 17 | #include <linux/elf.h> |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 18 | #include <linux/mm.h> |
| 19 | #include <linux/module.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 20 | |
| 21 | #include <asm/cpufeature.h> |
| 22 | #include <asm/msr.h> |
| 23 | #include <asm/pgtable.h> |
| 24 | #include <asm/unistd.h> |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 25 | #include <asm/elf.h> |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 26 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 27 | /* |
| 28 | * Should the kernel map a VDSO page into processes and pass its |
| 29 | * address down to glibc upon exec()? |
| 30 | */ |
Andi Kleen | 3bbf547 | 2006-12-07 02:14:08 +0100 | [diff] [blame] | 31 | #ifdef CONFIG_PARAVIRT |
| 32 | unsigned int __read_mostly vdso_enabled = 0; |
| 33 | #else |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 34 | unsigned int __read_mostly vdso_enabled = 1; |
Andi Kleen | 3bbf547 | 2006-12-07 02:14:08 +0100 | [diff] [blame] | 35 | #endif |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 36 | |
| 37 | EXPORT_SYMBOL_GPL(vdso_enabled); |
| 38 | |
| 39 | static int __init vdso_setup(char *s) |
| 40 | { |
| 41 | vdso_enabled = simple_strtoul(s, NULL, 0); |
| 42 | |
| 43 | return 1; |
| 44 | } |
| 45 | |
| 46 | __setup("vdso=", vdso_setup); |
| 47 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 48 | extern asmlinkage void sysenter_entry(void); |
| 49 | |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 50 | #ifdef CONFIG_COMPAT_VDSO |
| 51 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, |
| 52 | unsigned offset, unsigned size) |
| 53 | { |
| 54 | Elf32_Sym *sym = (void *)ehdr + offset; |
| 55 | unsigned nsym = size / sizeof(*sym); |
| 56 | unsigned i; |
| 57 | |
| 58 | for(i = 0; i < nsym; i++, sym++) { |
| 59 | if (sym->st_shndx == SHN_UNDEF || |
| 60 | sym->st_shndx == SHN_ABS) |
| 61 | continue; /* skip */ |
| 62 | |
| 63 | if (sym->st_shndx > SHN_LORESERVE) { |
| 64 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", |
| 65 | sym->st_shndx); |
| 66 | continue; |
| 67 | } |
| 68 | |
| 69 | switch(ELF_ST_TYPE(sym->st_info)) { |
| 70 | case STT_OBJECT: |
| 71 | case STT_FUNC: |
| 72 | case STT_SECTION: |
| 73 | case STT_FILE: |
| 74 | sym->st_value += VDSO_HIGH_BASE; |
| 75 | } |
| 76 | } |
| 77 | } |
| 78 | |
| 79 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) |
| 80 | { |
| 81 | Elf32_Dyn *dyn = (void *)ehdr + offset; |
| 82 | |
| 83 | for(; dyn->d_tag != DT_NULL; dyn++) |
| 84 | switch(dyn->d_tag) { |
| 85 | case DT_PLTGOT: |
| 86 | case DT_HASH: |
| 87 | case DT_STRTAB: |
| 88 | case DT_SYMTAB: |
| 89 | case DT_RELA: |
| 90 | case DT_INIT: |
| 91 | case DT_FINI: |
| 92 | case DT_REL: |
| 93 | case DT_DEBUG: |
| 94 | case DT_JMPREL: |
| 95 | case DT_VERSYM: |
| 96 | case DT_VERDEF: |
| 97 | case DT_VERNEED: |
| 98 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: |
| 99 | /* definitely pointers needing relocation */ |
| 100 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; |
| 101 | break; |
| 102 | |
| 103 | case DT_ENCODING ... OLD_DT_LOOS-1: |
| 104 | case DT_LOOS ... DT_HIOS-1: |
| 105 | /* Tags above DT_ENCODING are pointers if |
| 106 | they're even */ |
| 107 | if (dyn->d_tag >= DT_ENCODING && |
| 108 | (dyn->d_tag & 1) == 0) |
| 109 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; |
| 110 | break; |
| 111 | |
| 112 | case DT_VERDEFNUM: |
| 113 | case DT_VERNEEDNUM: |
| 114 | case DT_FLAGS_1: |
| 115 | case DT_RELACOUNT: |
| 116 | case DT_RELCOUNT: |
| 117 | case DT_VALRNGLO ... DT_VALRNGHI: |
| 118 | /* definitely not pointers */ |
| 119 | break; |
| 120 | |
| 121 | case OLD_DT_LOOS ... DT_LOOS-1: |
| 122 | case DT_HIOS ... DT_VALRNGLO-1: |
| 123 | default: |
| 124 | if (dyn->d_tag > DT_ENCODING) |
| 125 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", |
| 126 | dyn->d_tag); |
| 127 | break; |
| 128 | } |
| 129 | } |
| 130 | |
| 131 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) |
| 132 | { |
| 133 | Elf32_Phdr *phdr; |
| 134 | Elf32_Shdr *shdr; |
| 135 | int i; |
| 136 | |
| 137 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || |
| 138 | !elf_check_arch(ehdr) || |
| 139 | ehdr->e_type != ET_DYN); |
| 140 | |
| 141 | ehdr->e_entry += VDSO_HIGH_BASE; |
| 142 | |
| 143 | /* rebase phdrs */ |
| 144 | phdr = (void *)ehdr + ehdr->e_phoff; |
| 145 | for (i = 0; i < ehdr->e_phnum; i++) { |
| 146 | phdr[i].p_vaddr += VDSO_HIGH_BASE; |
| 147 | |
| 148 | /* relocate dynamic stuff */ |
| 149 | if (phdr[i].p_type == PT_DYNAMIC) |
| 150 | reloc_dyn(ehdr, phdr[i].p_offset); |
| 151 | } |
| 152 | |
| 153 | /* rebase sections */ |
| 154 | shdr = (void *)ehdr + ehdr->e_shoff; |
| 155 | for(i = 0; i < ehdr->e_shnum; i++) { |
| 156 | if (!(shdr[i].sh_flags & SHF_ALLOC)) |
| 157 | continue; |
| 158 | |
| 159 | shdr[i].sh_addr += VDSO_HIGH_BASE; |
| 160 | |
| 161 | if (shdr[i].sh_type == SHT_SYMTAB || |
| 162 | shdr[i].sh_type == SHT_DYNSYM) |
| 163 | reloc_symtab(ehdr, shdr[i].sh_offset, |
| 164 | shdr[i].sh_size); |
| 165 | } |
| 166 | } |
| 167 | #else |
| 168 | static inline void relocate_vdso(Elf32_Ehdr *ehdr) |
| 169 | { |
| 170 | } |
| 171 | #endif /* COMPAT_VDSO */ |
| 172 | |
Li Shaohua | 6fe940d | 2005-06-25 14:54:53 -0700 | [diff] [blame] | 173 | void enable_sep_cpu(void) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 174 | { |
| 175 | int cpu = get_cpu(); |
| 176 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
| 177 | |
Li Shaohua | 6fe940d | 2005-06-25 14:54:53 -0700 | [diff] [blame] | 178 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
| 179 | put_cpu(); |
| 180 | return; |
| 181 | } |
| 182 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 183 | tss->ss1 = __KERNEL_CS; |
| 184 | tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; |
| 185 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
| 186 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); |
| 187 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); |
| 188 | put_cpu(); |
| 189 | } |
| 190 | |
| 191 | /* |
| 192 | * These symbols are defined by vsyscall.o to mark the bounds |
| 193 | * of the ELF DSO images included therein. |
| 194 | */ |
| 195 | extern const char vsyscall_int80_start, vsyscall_int80_end; |
| 196 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 197 | static struct page *syscall_pages[1]; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 198 | |
Jeremy Fitzhardinge | a6c4e07 | 2007-05-02 19:27:12 +0200 | [diff] [blame] | 199 | int __init sysenter_setup(void) |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 200 | { |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 201 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 202 | const void *vsyscall; |
| 203 | size_t vsyscall_len; |
| 204 | |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 205 | syscall_pages[0] = virt_to_page(syscall_page); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 206 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 207 | #ifdef CONFIG_COMPAT_VDSO |
Jan Beulich | 22c5ace | 2007-02-13 13:26:26 +0100 | [diff] [blame] | 208 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 209 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 210 | #endif |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 211 | |
| 212 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 213 | vsyscall = &vsyscall_int80_start; |
| 214 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
| 215 | } else { |
| 216 | vsyscall = &vsyscall_sysenter_start; |
| 217 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 218 | } |
| 219 | |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 220 | memcpy(syscall_page, vsyscall, vsyscall_len); |
| 221 | relocate_vdso(syscall_page); |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 222 | |
Linus Torvalds | 1da177e | 2005-04-16 15:20:36 -0700 | [diff] [blame] | 223 | return 0; |
| 224 | } |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 225 | |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 226 | /* Defined in vsyscall-sysenter.S */ |
| 227 | extern void SYSENTER_RETURN; |
| 228 | |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 229 | #ifdef __HAVE_ARCH_GATE_AREA |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 230 | /* Setup a VMA at program startup for the vsyscall page */ |
| 231 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) |
| 232 | { |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 233 | struct mm_struct *mm = current->mm; |
| 234 | unsigned long addr; |
| 235 | int ret; |
| 236 | |
| 237 | down_write(&mm->mmap_sem); |
| 238 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
| 239 | if (IS_ERR_VALUE(addr)) { |
| 240 | ret = addr; |
| 241 | goto up_fail; |
| 242 | } |
| 243 | |
Roland McGrath | f47aef5 | 2007-01-26 00:56:49 -0800 | [diff] [blame] | 244 | /* |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 245 | * MAYWRITE to allow gdb to COW and set breakpoints |
| 246 | * |
Roland McGrath | f47aef5 | 2007-01-26 00:56:49 -0800 | [diff] [blame] | 247 | * Make sure the vDSO gets into every core dump. |
| 248 | * Dumping its contents makes post-mortem fully interpretable later |
| 249 | * without matching up the same kernel and hardware config to see |
| 250 | * what PC values meant. |
| 251 | */ |
Roland McGrath | 7d91d53 | 2007-02-08 14:20:42 -0800 | [diff] [blame] | 252 | ret = install_special_mapping(mm, addr, PAGE_SIZE, |
| 253 | VM_READ|VM_EXEC| |
| 254 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| |
| 255 | VM_ALWAYSDUMP, |
| 256 | syscall_pages); |
| 257 | if (ret) |
pageexec@freemail.hu | 79bc79b | 2006-06-28 20:44:16 +0200 | [diff] [blame] | 258 | goto up_fail; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 259 | |
| 260 | current->mm->context.vdso = (void *)addr; |
| 261 | current_thread_info()->sysenter_return = |
| 262 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 263 | up_fail: |
| 264 | up_write(&mm->mmap_sem); |
| 265 | return ret; |
Ingo Molnar | e6e5494 | 2006-06-27 02:53:50 -0700 | [diff] [blame] | 266 | } |
| 267 | |
| 268 | const char *arch_vma_name(struct vm_area_struct *vma) |
| 269 | { |
| 270 | if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) |
| 271 | return "[vdso]"; |
| 272 | return NULL; |
| 273 | } |
| 274 | |
| 275 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
| 276 | { |
| 277 | return NULL; |
| 278 | } |
| 279 | |
| 280 | int in_gate_area(struct task_struct *task, unsigned long addr) |
| 281 | { |
| 282 | return 0; |
| 283 | } |
| 284 | |
| 285 | int in_gate_area_no_task(unsigned long addr) |
| 286 | { |
| 287 | return 0; |
| 288 | } |
Jeremy Fitzhardinge | d4f7a2c | 2007-05-02 19:27:12 +0200 | [diff] [blame^] | 289 | #else /* !__HAVE_ARCH_GATE_AREA */ |
| 290 | int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) |
| 291 | { |
| 292 | /* |
| 293 | * If not creating userspace VMA, simply set vdso to point to |
| 294 | * fixmap page. |
| 295 | */ |
| 296 | current->mm->context.vdso = (void *)VDSO_HIGH_BASE; |
| 297 | current_thread_info()->sysenter_return = |
| 298 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
| 299 | |
| 300 | return 0; |
| 301 | } |
| 302 | #endif /* __HAVE_ARCH_GATE_AREA */ |