| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 1 | /****************************************************************************** | 
|  | 2 | * arch/ia64/xen/time.c | 
|  | 3 | * | 
|  | 4 | * Copyright (c) 2008 Isaku Yamahata <yamahata at valinux co jp> | 
|  | 5 | *                    VA Linux Systems Japan K.K. | 
|  | 6 | * | 
|  | 7 | * This program is free software; you can redistribute it and/or modify | 
|  | 8 | * it under the terms of the GNU General Public License as published by | 
|  | 9 | * the Free Software Foundation; either version 2 of the License, or | 
|  | 10 | * (at your option) any later version. | 
|  | 11 | * | 
|  | 12 | * This program is distributed in the hope that it will be useful, | 
|  | 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | 
|  | 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the | 
|  | 15 | * GNU General Public License for more details. | 
|  | 16 | * | 
|  | 17 | * You should have received a copy of the GNU General Public License | 
|  | 18 | * along with this program; if not, write to the Free Software | 
|  | 19 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA | 
|  | 20 | * | 
|  | 21 | */ | 
|  | 22 |  | 
|  | 23 | #include <linux/delay.h> | 
|  | 24 | #include <linux/kernel_stat.h> | 
|  | 25 | #include <linux/posix-timers.h> | 
|  | 26 | #include <linux/irq.h> | 
|  | 27 | #include <linux/clocksource.h> | 
|  | 28 |  | 
| Isaku Yamahata | 91834e6 | 2008-10-17 11:18:10 +0900 | [diff] [blame] | 29 | #include <asm/timex.h> | 
|  | 30 |  | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 31 | #include <asm/xen/hypervisor.h> | 
|  | 32 |  | 
|  | 33 | #include <xen/interface/vcpu.h> | 
|  | 34 |  | 
|  | 35 | #include "../kernel/fsyscall_gtod_data.h" | 
|  | 36 |  | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 37 | static DEFINE_PER_CPU(struct vcpu_runstate_info, xen_runstate); | 
|  | 38 | static DEFINE_PER_CPU(unsigned long, xen_stolen_time); | 
|  | 39 | static DEFINE_PER_CPU(unsigned long, xen_blocked_time); | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 40 |  | 
|  | 41 | /* taken from i386/kernel/time-xen.c */ | 
|  | 42 | static void xen_init_missing_ticks_accounting(int cpu) | 
|  | 43 | { | 
|  | 44 | struct vcpu_register_runstate_memory_area area; | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 45 | struct vcpu_runstate_info *runstate = &per_cpu(xen_runstate, cpu); | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 46 | int rc; | 
|  | 47 |  | 
|  | 48 | memset(runstate, 0, sizeof(*runstate)); | 
|  | 49 |  | 
|  | 50 | area.addr.v = runstate; | 
|  | 51 | rc = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, cpu, | 
|  | 52 | &area); | 
|  | 53 | WARN_ON(rc && rc != -ENOSYS); | 
|  | 54 |  | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 55 | per_cpu(xen_blocked_time, cpu) = runstate->time[RUNSTATE_blocked]; | 
|  | 56 | per_cpu(xen_stolen_time, cpu) = runstate->time[RUNSTATE_runnable] | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 57 | + runstate->time[RUNSTATE_offline]; | 
|  | 58 | } | 
|  | 59 |  | 
|  | 60 | /* | 
|  | 61 | * Runstate accounting | 
|  | 62 | */ | 
|  | 63 | /* stolen from arch/x86/xen/time.c */ | 
|  | 64 | static void get_runstate_snapshot(struct vcpu_runstate_info *res) | 
|  | 65 | { | 
|  | 66 | u64 state_time; | 
|  | 67 | struct vcpu_runstate_info *state; | 
|  | 68 |  | 
|  | 69 | BUG_ON(preemptible()); | 
|  | 70 |  | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 71 | state = &__get_cpu_var(xen_runstate); | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 72 |  | 
|  | 73 | /* | 
|  | 74 | * The runstate info is always updated by the hypervisor on | 
|  | 75 | * the current CPU, so there's no need to use anything | 
|  | 76 | * stronger than a compiler barrier when fetching it. | 
|  | 77 | */ | 
|  | 78 | do { | 
|  | 79 | state_time = state->state_entry_time; | 
|  | 80 | rmb(); | 
|  | 81 | *res = *state; | 
|  | 82 | rmb(); | 
|  | 83 | } while (state->state_entry_time != state_time); | 
|  | 84 | } | 
|  | 85 |  | 
|  | 86 | #define NS_PER_TICK (1000000000LL/HZ) | 
|  | 87 |  | 
|  | 88 | static unsigned long | 
|  | 89 | consider_steal_time(unsigned long new_itm) | 
|  | 90 | { | 
|  | 91 | unsigned long stolen, blocked; | 
|  | 92 | unsigned long delta_itm = 0, stolentick = 0; | 
|  | 93 | int cpu = smp_processor_id(); | 
|  | 94 | struct vcpu_runstate_info runstate; | 
|  | 95 | struct task_struct *p = current; | 
|  | 96 |  | 
|  | 97 | get_runstate_snapshot(&runstate); | 
|  | 98 |  | 
|  | 99 | /* | 
|  | 100 | * Check for vcpu migration effect | 
|  | 101 | * In this case, itc value is reversed. | 
|  | 102 | * This causes huge stolen value. | 
|  | 103 | * This function just checks and reject this effect. | 
|  | 104 | */ | 
|  | 105 | if (!time_after_eq(runstate.time[RUNSTATE_blocked], | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 106 | per_cpu(xen_blocked_time, cpu))) | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 107 | blocked = 0; | 
|  | 108 |  | 
|  | 109 | if (!time_after_eq(runstate.time[RUNSTATE_runnable] + | 
|  | 110 | runstate.time[RUNSTATE_offline], | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 111 | per_cpu(xen_stolen_time, cpu))) | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 112 | stolen = 0; | 
|  | 113 |  | 
|  | 114 | if (!time_after(delta_itm + new_itm, ia64_get_itc())) | 
|  | 115 | stolentick = ia64_get_itc() - new_itm; | 
|  | 116 |  | 
|  | 117 | do_div(stolentick, NS_PER_TICK); | 
|  | 118 | stolentick++; | 
|  | 119 |  | 
|  | 120 | do_div(stolen, NS_PER_TICK); | 
|  | 121 |  | 
|  | 122 | if (stolen > stolentick) | 
|  | 123 | stolen = stolentick; | 
|  | 124 |  | 
|  | 125 | stolentick -= stolen; | 
|  | 126 | do_div(blocked, NS_PER_TICK); | 
|  | 127 |  | 
|  | 128 | if (blocked > stolentick) | 
|  | 129 | blocked = stolentick; | 
|  | 130 |  | 
|  | 131 | if (stolen > 0 || blocked > 0) { | 
| Isaku Yamahata | 7a0b6e0 | 2009-01-15 15:16:55 +0900 | [diff] [blame] | 132 | account_steal_ticks(stolen); | 
|  | 133 | account_idle_ticks(blocked); | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 134 | run_local_timers(); | 
|  | 135 |  | 
| Paul E. McKenney | a157229 | 2009-08-22 13:56:51 -0700 | [diff] [blame] | 136 | rcu_check_callbacks(cpu, user_mode(get_irq_regs())); | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 137 |  | 
|  | 138 | scheduler_tick(); | 
|  | 139 | run_posix_cpu_timers(p); | 
|  | 140 | delta_itm += local_cpu_data->itm_delta * (stolen + blocked); | 
|  | 141 |  | 
| Torben Hohn | 1aabd67 | 2011-01-27 15:59:56 +0100 | [diff] [blame] | 142 | if (cpu == time_keeper_id) | 
|  | 143 | xtime_update(stolen + blocked); | 
|  | 144 |  | 
|  | 145 | local_cpu_data->itm_next = delta_itm + new_itm; | 
|  | 146 |  | 
| Tejun Heo | c6e22f9 | 2009-10-29 22:34:13 +0900 | [diff] [blame] | 147 | per_cpu(xen_stolen_time, cpu) += NS_PER_TICK * stolen; | 
|  | 148 | per_cpu(xen_blocked_time, cpu) += NS_PER_TICK * blocked; | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 149 | } | 
|  | 150 | return delta_itm; | 
|  | 151 | } | 
|  | 152 |  | 
|  | 153 | static int xen_do_steal_accounting(unsigned long *new_itm) | 
|  | 154 | { | 
|  | 155 | unsigned long delta_itm; | 
|  | 156 | delta_itm = consider_steal_time(*new_itm); | 
|  | 157 | *new_itm += delta_itm; | 
|  | 158 | if (time_after(*new_itm, ia64_get_itc()) && delta_itm) | 
|  | 159 | return 1; | 
|  | 160 |  | 
|  | 161 | return 0; | 
|  | 162 | } | 
|  | 163 |  | 
|  | 164 | static void xen_itc_jitter_data_reset(void) | 
|  | 165 | { | 
|  | 166 | u64 lcycle, ret; | 
|  | 167 |  | 
|  | 168 | do { | 
|  | 169 | lcycle = itc_jitter_data.itc_lastcycle; | 
|  | 170 | ret = cmpxchg(&itc_jitter_data.itc_lastcycle, lcycle, 0); | 
|  | 171 | } while (unlikely(ret != lcycle)); | 
|  | 172 | } | 
|  | 173 |  | 
| Isaku Yamahata | 1aec1c5 | 2009-03-04 21:05:41 +0900 | [diff] [blame] | 174 | /* based on xen_sched_clock() in arch/x86/xen/time.c. */ | 
|  | 175 | /* | 
|  | 176 | * This relies on HAVE_UNSTABLE_SCHED_CLOCK. If it can't be defined, | 
|  | 177 | * something similar logic should be implemented here. | 
|  | 178 | */ | 
|  | 179 | /* | 
|  | 180 | * Xen sched_clock implementation.  Returns the number of unstolen | 
|  | 181 | * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED | 
|  | 182 | * states. | 
|  | 183 | */ | 
|  | 184 | static unsigned long long xen_sched_clock(void) | 
|  | 185 | { | 
|  | 186 | struct vcpu_runstate_info runstate; | 
|  | 187 |  | 
|  | 188 | unsigned long long now; | 
|  | 189 | unsigned long long offset; | 
|  | 190 | unsigned long long ret; | 
|  | 191 |  | 
|  | 192 | /* | 
|  | 193 | * Ideally sched_clock should be called on a per-cpu basis | 
|  | 194 | * anyway, so preempt should already be disabled, but that's | 
|  | 195 | * not current practice at the moment. | 
|  | 196 | */ | 
|  | 197 | preempt_disable(); | 
|  | 198 |  | 
|  | 199 | /* | 
|  | 200 | * both ia64_native_sched_clock() and xen's runstate are | 
|  | 201 | * based on mAR.ITC. So difference of them makes sense. | 
|  | 202 | */ | 
|  | 203 | now = ia64_native_sched_clock(); | 
|  | 204 |  | 
|  | 205 | get_runstate_snapshot(&runstate); | 
|  | 206 |  | 
|  | 207 | WARN_ON(runstate.state != RUNSTATE_running); | 
|  | 208 |  | 
|  | 209 | offset = 0; | 
|  | 210 | if (now > runstate.state_entry_time) | 
|  | 211 | offset = now - runstate.state_entry_time; | 
|  | 212 | ret = runstate.time[RUNSTATE_blocked] + | 
|  | 213 | runstate.time[RUNSTATE_running] + | 
|  | 214 | offset; | 
|  | 215 |  | 
|  | 216 | preempt_enable(); | 
|  | 217 |  | 
|  | 218 | return ret; | 
|  | 219 | } | 
|  | 220 |  | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 221 | struct pv_time_ops xen_time_ops __initdata = { | 
|  | 222 | .init_missing_ticks_accounting	= xen_init_missing_ticks_accounting, | 
|  | 223 | .do_steal_accounting		= xen_do_steal_accounting, | 
|  | 224 | .clocksource_resume		= xen_itc_jitter_data_reset, | 
| Isaku Yamahata | 1aec1c5 | 2009-03-04 21:05:41 +0900 | [diff] [blame] | 225 | .sched_clock			= xen_sched_clock, | 
| Isaku Yamahata | dcbbecd | 2008-10-17 11:18:08 +0900 | [diff] [blame] | 226 | }; | 
| Isaku Yamahata | 91834e6 | 2008-10-17 11:18:10 +0900 | [diff] [blame] | 227 |  | 
|  | 228 | /* Called after suspend, to resume time.  */ | 
|  | 229 | static void xen_local_tick_resume(void) | 
|  | 230 | { | 
|  | 231 | /* Just trigger a tick.  */ | 
|  | 232 | ia64_cpu_local_tick(); | 
|  | 233 | touch_softlockup_watchdog(); | 
|  | 234 | } | 
|  | 235 |  | 
|  | 236 | void | 
|  | 237 | xen_timer_resume(void) | 
|  | 238 | { | 
|  | 239 | unsigned int cpu; | 
|  | 240 |  | 
|  | 241 | xen_local_tick_resume(); | 
|  | 242 |  | 
|  | 243 | for_each_online_cpu(cpu) | 
|  | 244 | xen_init_missing_ticks_accounting(cpu); | 
|  | 245 | } | 
|  | 246 |  | 
|  | 247 | static void ia64_cpu_local_tick_fn(void *unused) | 
|  | 248 | { | 
|  | 249 | xen_local_tick_resume(); | 
|  | 250 | xen_init_missing_ticks_accounting(smp_processor_id()); | 
|  | 251 | } | 
|  | 252 |  | 
|  | 253 | void | 
|  | 254 | xen_timer_resume_on_aps(void) | 
|  | 255 | { | 
|  | 256 | smp_call_function(&ia64_cpu_local_tick_fn, NULL, 1); | 
|  | 257 | } |