blob: 10cd8ac3395a12705f8f5bb5a9ebed7c9ef9ff05 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
Linus Torvalds1da177e2005-04-16 15:20:36 -07002 * Copyright (C) 2001 Andrea Arcangeli <andrea@suse.de> SuSE
3 * Copyright 2003 Andi Kleen, SuSE Labs.
4 *
Andy Lutomirski5cec93c2011-06-05 13:50:24 -04005 * [ NOTE: this mechanism is now deprecated in favor of the vDSO. ]
6 *
Linus Torvalds1da177e2005-04-16 15:20:36 -07007 * Thanks to hpa@transmeta.com for some useful hint.
8 * Special thanks to Ingo Molnar for his early experience with
9 * a different vsyscall implementation for Linux/IA32 and for the name.
10 *
11 * vsyscall 1 is located at -10Mbyte, vsyscall 2 is located
12 * at virtual address -10Mbyte+1024bytes etc... There are at max 4
13 * vsyscalls. One vsyscall can reserve more than 1 slot to avoid
14 * jumping out of line if necessary. We cannot add more with this
15 * mechanism because older kernels won't return -ENOSYS.
Linus Torvalds1da177e2005-04-16 15:20:36 -070016 *
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040017 * Note: the concept clashes with user mode linux. UML users should
18 * use the vDSO.
Linus Torvalds1da177e2005-04-16 15:20:36 -070019 */
20
Ingo Molnar2b7d0392008-11-12 13:17:38 +010021/* Disable profiling for userspace code: */
Steven Rostedt2ed84ee2008-11-12 15:24:24 -050022#define DISABLE_BRANCH_PROFILING
Steven Rostedt1f0d69a2008-11-12 00:14:39 -050023
Linus Torvalds1da177e2005-04-16 15:20:36 -070024#include <linux/time.h>
25#include <linux/init.h>
26#include <linux/kernel.h>
27#include <linux/timer.h>
28#include <linux/seqlock.h>
29#include <linux/jiffies.h>
30#include <linux/sysctl.h>
john stultz7460ed22007-02-16 01:28:21 -080031#include <linux/clocksource.h>
Vojtech Pavlikc08c8202006-09-26 10:52:28 +020032#include <linux/getcpu.h>
Andi Kleen8c131af2006-11-14 16:57:46 +010033#include <linux/cpu.h>
34#include <linux/smp.h>
35#include <linux/notifier.h>
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040036#include <linux/syscalls.h>
37#include <linux/ratelimit.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070038
39#include <asm/vsyscall.h>
40#include <asm/pgtable.h>
41#include <asm/page.h>
john stultz7460ed22007-02-16 01:28:21 -080042#include <asm/unistd.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070043#include <asm/fixmap.h>
44#include <asm/errno.h>
45#include <asm/io.h>
Vojtech Pavlikc08c8202006-09-26 10:52:28 +020046#include <asm/segment.h>
47#include <asm/desc.h>
48#include <asm/topology.h>
Andi Kleen2aae9502007-07-21 17:10:01 +020049#include <asm/vgtod.h>
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040050#include <asm/traps.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070051
Andy Lutomirski8c49d9a2011-05-23 09:31:24 -040052DEFINE_VVAR(int, vgetcpu_mode);
53DEFINE_VVAR(struct vsyscall_gtod_data, vsyscall_gtod_data) =
Linus Torvalds1da177e2005-04-16 15:20:36 -070054{
Eric Dumazetc4dbe542011-05-24 14:08:08 +020055 .lock = __SEQLOCK_UNLOCKED(__vsyscall_gtod_data.lock),
john stultz7460ed22007-02-16 01:28:21 -080056};
Linus Torvalds1da177e2005-04-16 15:20:36 -070057
Tony Breeds2c622142007-10-18 03:04:57 -070058void update_vsyscall_tz(void)
59{
60 unsigned long flags;
61
62 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
63 /* sys_tz has changed */
64 vsyscall_gtod_data.sys_tz = sys_tz;
65 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
66}
67
John Stultz76158562010-07-13 17:56:23 -070068void update_vsyscall(struct timespec *wall_time, struct timespec *wtm,
69 struct clocksource *clock, u32 mult)
john stultz7460ed22007-02-16 01:28:21 -080070{
71 unsigned long flags;
72
73 write_seqlock_irqsave(&vsyscall_gtod_data.lock, flags);
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040074
john stultz7460ed22007-02-16 01:28:21 -080075 /* copy vsyscall data */
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040076 vsyscall_gtod_data.clock.vread = clock->vread;
77 vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
78 vsyscall_gtod_data.clock.mask = clock->mask;
79 vsyscall_gtod_data.clock.mult = mult;
80 vsyscall_gtod_data.clock.shift = clock->shift;
81 vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
82 vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
83 vsyscall_gtod_data.wall_to_monotonic = *wtm;
84 vsyscall_gtod_data.wall_time_coarse = __current_kernel_time();
85
john stultz7460ed22007-02-16 01:28:21 -080086 write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
Linus Torvalds1da177e2005-04-16 15:20:36 -070087}
88
Andy Lutomirski5cec93c2011-06-05 13:50:24 -040089static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
90 const char *message)
91{
92 static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST);
93 struct task_struct *tsk;
94
95 if (!show_unhandled_signals || !__ratelimit(&rs))
96 return;
97
98 tsk = current;
99
100 printk("%s%s[%d] %s ip:%lx sp:%lx ax:%lx si:%lx di:%lx\n",
101 level, tsk->comm, task_pid_nr(tsk),
102 message, regs->ip - 2, regs->sp, regs->ax, regs->si, regs->di);
103}
104
105void dotraplinkage do_emulate_vsyscall(struct pt_regs *regs, long error_code)
106{
107 const char *vsyscall_name;
108 struct task_struct *tsk;
109 unsigned long caller;
110 int vsyscall_nr;
111 long ret;
112
113 /* Kernel code must never get here. */
114 BUG_ON(!user_mode(regs));
115
116 local_irq_enable();
117
118 /*
119 * x86-ism here: regs->ip points to the instruction after the int 0xcc,
120 * and int 0xcc is two bytes long.
121 */
122 if (!is_vsyscall_entry(regs->ip - 2)) {
123 warn_bad_vsyscall(KERN_WARNING, regs, "illegal int 0xcc (exploit attempt?)");
124 goto sigsegv;
125 }
126 vsyscall_nr = vsyscall_entry_nr(regs->ip - 2);
127
128 if (get_user(caller, (unsigned long __user *)regs->sp) != 0) {
129 warn_bad_vsyscall(KERN_WARNING, regs, "int 0xcc with bad stack (exploit attempt?)");
130 goto sigsegv;
131 }
132
133 tsk = current;
134 if (seccomp_mode(&tsk->seccomp))
135 do_exit(SIGKILL);
136
137 switch (vsyscall_nr) {
138 case 0:
139 vsyscall_name = "gettimeofday";
140 ret = sys_gettimeofday(
141 (struct timeval __user *)regs->di,
142 (struct timezone __user *)regs->si);
143 break;
144
145 case 1:
146 vsyscall_name = "time";
147 ret = sys_time((time_t __user *)regs->di);
148 break;
149
150 case 2:
151 vsyscall_name = "getcpu";
152 ret = sys_getcpu((unsigned __user *)regs->di,
153 (unsigned __user *)regs->si,
154 0);
155 break;
156
157 default:
158 /*
159 * If we get here, then vsyscall_nr indicates that int 0xcc
160 * happened at an address in the vsyscall page that doesn't
161 * contain int 0xcc. That can't happen.
162 */
163 BUG();
164 }
165
166 if (ret == -EFAULT) {
167 /*
168 * Bad news -- userspace fed a bad pointer to a vsyscall.
169 *
170 * With a real vsyscall, that would have caused SIGSEGV.
171 * To make writing reliable exploits using the emulated
172 * vsyscalls harder, generate SIGSEGV here as well.
173 */
174 warn_bad_vsyscall(KERN_INFO, regs,
175 "vsyscall fault (exploit attempt?)");
176 goto sigsegv;
177 }
178
179 regs->ax = ret;
180
181 /* Emulate a ret instruction. */
182 regs->ip = caller;
183 regs->sp += 8;
184
185 local_irq_disable();
186 return;
187
188sigsegv:
189 regs->ip -= 2; /* The faulting instruction should be the int 0xcc. */
190 force_sig(SIGSEGV, current);
191}
192
193/*
194 * Assume __initcall executes before all user space. Hopefully kmod
195 * doesn't violate that. We'll find out if it does.
john stultz7460ed22007-02-16 01:28:21 -0800196 */
Andi Kleen8c131af2006-11-14 16:57:46 +0100197static void __cpuinit vsyscall_set_cpu(int cpu)
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200198{
Jeremy Fitzhardingefc8b8a62008-06-25 00:19:01 -0400199 unsigned long d;
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200200 unsigned long node = 0;
201#ifdef CONFIG_NUMA
Mike Travis98c9e272007-10-17 18:04:39 +0200202 node = cpu_to_node(cpu);
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200203#endif
Mike Travis92cb7612007-10-19 20:35:04 +0200204 if (cpu_has(&cpu_data(cpu), X86_FEATURE_RDTSCP))
Andi Kleen8c131af2006-11-14 16:57:46 +0100205 write_rdtscp_aux((node << 12) | cpu);
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200206
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400207 /*
208 * Store cpu number in limit so that it can be loaded quickly
209 * in user space in vgetcpu. (12 bits for the CPU and 8 bits for the node)
210 */
Jeremy Fitzhardingefc8b8a62008-06-25 00:19:01 -0400211 d = 0x0f40000000000ULL;
212 d |= cpu;
213 d |= (node & 0xf) << 12;
214 d |= (node >> 4) << 48;
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400215
Jeremy Fitzhardingefc8b8a62008-06-25 00:19:01 -0400216 write_gdt_entry(get_cpu_gdt_table(cpu), GDT_ENTRY_PER_CPU, &d, DESCTYPE_S);
Vojtech Pavlikc08c8202006-09-26 10:52:28 +0200217}
218
Andi Kleen8c131af2006-11-14 16:57:46 +0100219static void __cpuinit cpu_vsyscall_init(void *arg)
220{
221 /* preemption should be already off */
222 vsyscall_set_cpu(raw_smp_processor_id());
223}
224
225static int __cpuinit
226cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
227{
228 long cpu = (long)arg;
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400229
Rafael J. Wysocki8bb78442007-05-09 02:35:10 -0700230 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
Jens Axboe8691e5a2008-06-06 11:18:06 +0200231 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400232
Andi Kleen8c131af2006-11-14 16:57:46 +0100233 return NOTIFY_DONE;
234}
235
Ingo Molnare4026442008-01-30 13:32:39 +0100236void __init map_vsyscall(void)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237{
238 extern char __vsyscall_0;
239 unsigned long physaddr_page0 = __pa_symbol(&__vsyscall_0);
Andy Lutomirski9fd67b42011-06-05 13:50:19 -0400240 extern char __vvar_page;
241 unsigned long physaddr_vvar_page = __pa_symbol(&__vvar_page);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700242
Ernie Petrides103efcd2006-12-07 02:14:09 +0100243 /* Note that VSYSCALL_MAPPED_PAGES must agree with the code below. */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700244 __set_fixmap(VSYSCALL_FIRST_PAGE, physaddr_page0, PAGE_KERNEL_VSYSCALL);
Andy Lutomirski9fd67b42011-06-05 13:50:19 -0400245 __set_fixmap(VVAR_PAGE, physaddr_vvar_page, PAGE_KERNEL_VVAR);
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400246 BUILD_BUG_ON((unsigned long)__fix_to_virt(VVAR_PAGE) != (unsigned long)VVAR_ADDRESS);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700247}
248
249static int __init vsyscall_init(void)
250{
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400251 BUG_ON(VSYSCALL_ADDR(0) != __fix_to_virt(VSYSCALL_FIRST_PAGE));
252
Jens Axboe15c8b6c2008-05-09 09:39:44 +0200253 on_each_cpu(cpu_vsyscall_init, NULL, 1);
Sheng Yangbe43f832009-12-18 16:48:45 +0800254 /* notifier priority > KVM */
255 hotcpu_notifier(cpu_vsyscall_notifier, 30);
Andy Lutomirski5cec93c2011-06-05 13:50:24 -0400256
Linus Torvalds1da177e2005-04-16 15:20:36 -0700257 return 0;
258}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259__initcall(vsyscall_init);