| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 1 | /* | 
|  | 2 | * Context tracking: Probe on high level context boundaries such as kernel | 
|  | 3 | * and userspace. This includes syscalls and exceptions entry/exit. | 
|  | 4 | * | 
|  | 5 | * This is used by RCU to remove its dependency on the timer tick while a CPU | 
|  | 6 | * runs in userspace. | 
|  | 7 | * | 
|  | 8 | *  Started by Frederic Weisbecker: | 
|  | 9 | * | 
|  | 10 | * Copyright (C) 2012 Red Hat, Inc., Frederic Weisbecker <fweisbec@redhat.com> | 
|  | 11 | * | 
|  | 12 | * Many thanks to Gilad Ben-Yossef, Paul McKenney, Ingo Molnar, Andrew Morton, | 
|  | 13 | * Steven Rostedt, Peter Zijlstra for suggestions and improvements. | 
|  | 14 | * | 
|  | 15 | */ | 
|  | 16 |  | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 17 | #include <linux/context_tracking.h> | 
| Frederic Weisbecker | 6a61671 | 2012-12-16 20:00:34 +0100 | [diff] [blame] | 18 | #include <linux/kvm_host.h> | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 19 | #include <linux/rcupdate.h> | 
|  | 20 | #include <linux/sched.h> | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 21 | #include <linux/hardirq.h> | 
| Frederic Weisbecker | 6a61671 | 2012-12-16 20:00:34 +0100 | [diff] [blame] | 22 | #include <linux/export.h> | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 23 |  | 
| Frederic Weisbecker | 95a79fd | 2013-01-07 18:12:14 +0100 | [diff] [blame] | 24 | DEFINE_PER_CPU(struct context_tracking, context_tracking) = { | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 25 | #ifdef CONFIG_CONTEXT_TRACKING_FORCE | 
|  | 26 | .active = true, | 
|  | 27 | #endif | 
|  | 28 | }; | 
|  | 29 |  | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 30 | /** | 
|  | 31 | * user_enter - Inform the context tracking that the CPU is going to | 
|  | 32 | *              enter userspace mode. | 
|  | 33 | * | 
|  | 34 | * This function must be called right before we switch from the kernel | 
|  | 35 | * to userspace, when it's guaranteed the remaining kernel instructions | 
|  | 36 | * to execute won't use any RCU read side critical section because this | 
|  | 37 | * function sets RCU in extended quiescent state. | 
|  | 38 | */ | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 39 | void user_enter(void) | 
|  | 40 | { | 
|  | 41 | unsigned long flags; | 
|  | 42 |  | 
|  | 43 | /* | 
|  | 44 | * Some contexts may involve an exception occuring in an irq, | 
|  | 45 | * leading to that nesting: | 
|  | 46 | * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit() | 
|  | 47 | * This would mess up the dyntick_nesting count though. And rcu_irq_*() | 
|  | 48 | * helpers are enough to protect RCU uses inside the exception. So | 
|  | 49 | * just return immediately if we detect we are in an IRQ. | 
|  | 50 | */ | 
|  | 51 | if (in_interrupt()) | 
|  | 52 | return; | 
|  | 53 |  | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 54 | /* Kernel threads aren't supposed to go to userspace */ | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 55 | WARN_ON_ONCE(!current->mm); | 
|  | 56 |  | 
|  | 57 | local_irq_save(flags); | 
|  | 58 | if (__this_cpu_read(context_tracking.active) && | 
|  | 59 | __this_cpu_read(context_tracking.state) != IN_USER) { | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 60 | /* | 
|  | 61 | * At this stage, only low level arch entry code remains and | 
|  | 62 | * then we'll run in userspace. We can assume there won't be | 
|  | 63 | * any RCU read-side critical section until the next call to | 
|  | 64 | * user_exit() or rcu_irq_enter(). Let's remove RCU's dependency | 
|  | 65 | * on the tick. | 
|  | 66 | */ | 
| Frederic Weisbecker | abf917c | 2012-07-25 07:56:04 +0200 | [diff] [blame] | 67 | vtime_user_enter(current); | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 68 | rcu_user_enter(); | 
| Frederic Weisbecker | abf917c | 2012-07-25 07:56:04 +0200 | [diff] [blame] | 69 | __this_cpu_write(context_tracking.state, IN_USER); | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 70 | } | 
|  | 71 | local_irq_restore(flags); | 
|  | 72 | } | 
|  | 73 |  | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 74 |  | 
|  | 75 | /** | 
|  | 76 | * user_exit - Inform the context tracking that the CPU is | 
|  | 77 | *             exiting userspace mode and entering the kernel. | 
|  | 78 | * | 
|  | 79 | * This function must be called after we entered the kernel from userspace | 
|  | 80 | * before any use of RCU read side critical section. This potentially include | 
|  | 81 | * any high level kernel code like syscalls, exceptions, signal handling, etc... | 
|  | 82 | * | 
|  | 83 | * This call supports re-entrancy. This way it can be called from any exception | 
|  | 84 | * handler without needing to know if we came from userspace or not. | 
|  | 85 | */ | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 86 | void user_exit(void) | 
|  | 87 | { | 
|  | 88 | unsigned long flags; | 
|  | 89 |  | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 90 | if (in_interrupt()) | 
|  | 91 | return; | 
|  | 92 |  | 
|  | 93 | local_irq_save(flags); | 
|  | 94 | if (__this_cpu_read(context_tracking.state) == IN_USER) { | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 95 | /* | 
|  | 96 | * We are going to run code that may use RCU. Inform | 
|  | 97 | * RCU core about that (ie: we may need the tick again). | 
|  | 98 | */ | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 99 | rcu_user_exit(); | 
| Frederic Weisbecker | abf917c | 2012-07-25 07:56:04 +0200 | [diff] [blame] | 100 | vtime_user_exit(current); | 
|  | 101 | __this_cpu_write(context_tracking.state, IN_KERNEL); | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 102 | } | 
|  | 103 | local_irq_restore(flags); | 
|  | 104 | } | 
|  | 105 |  | 
| Frederic Weisbecker | 6a61671 | 2012-12-16 20:00:34 +0100 | [diff] [blame] | 106 | void guest_enter(void) | 
|  | 107 | { | 
|  | 108 | if (vtime_accounting_enabled()) | 
|  | 109 | vtime_guest_enter(current); | 
|  | 110 | else | 
|  | 111 | __guest_enter(); | 
|  | 112 | } | 
|  | 113 | EXPORT_SYMBOL_GPL(guest_enter); | 
|  | 114 |  | 
|  | 115 | void guest_exit(void) | 
|  | 116 | { | 
|  | 117 | if (vtime_accounting_enabled()) | 
|  | 118 | vtime_guest_exit(current); | 
|  | 119 | else | 
|  | 120 | __guest_exit(); | 
|  | 121 | } | 
|  | 122 | EXPORT_SYMBOL_GPL(guest_exit); | 
|  | 123 |  | 
| Frederic Weisbecker | 4eacdf1 | 2013-01-16 17:16:37 +0100 | [diff] [blame] | 124 |  | 
|  | 125 | /** | 
|  | 126 | * context_tracking_task_switch - context switch the syscall callbacks | 
|  | 127 | * @prev: the task that is being switched out | 
|  | 128 | * @next: the task that is being switched in | 
|  | 129 | * | 
|  | 130 | * The context tracking uses the syscall slow path to implement its user-kernel | 
|  | 131 | * boundaries probes on syscalls. This way it doesn't impact the syscall fast | 
|  | 132 | * path on CPUs that don't do context tracking. | 
|  | 133 | * | 
|  | 134 | * But we need to clear the flag on the previous task because it may later | 
|  | 135 | * migrate to some CPU that doesn't do the context tracking. As such the TIF | 
|  | 136 | * flag may not be desired there. | 
|  | 137 | */ | 
| Frederic Weisbecker | 91d1aa43 | 2012-11-27 19:33:25 +0100 | [diff] [blame] | 138 | void context_tracking_task_switch(struct task_struct *prev, | 
|  | 139 | struct task_struct *next) | 
|  | 140 | { | 
|  | 141 | if (__this_cpu_read(context_tracking.active)) { | 
|  | 142 | clear_tsk_thread_flag(prev, TIF_NOHZ); | 
|  | 143 | set_tsk_thread_flag(next, TIF_NOHZ); | 
|  | 144 | } | 
|  | 145 | } |