| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 1 | /* | 
 | 2 |  * Xen SMP support | 
 | 3 |  * | 
 | 4 |  * This file implements the Xen versions of smp_ops.  SMP under Xen is | 
 | 5 |  * very straightforward.  Bringing a CPU up is simply a matter of | 
 | 6 |  * loading its initial context and setting it running. | 
 | 7 |  * | 
 | 8 |  * IPIs are handled through the Xen event mechanism. | 
 | 9 |  * | 
 | 10 |  * Because virtual CPUs can be scheduled onto any real CPU, there's no | 
 | 11 |  * useful topology information for the kernel to make use of.  As a | 
 | 12 |  * result, all CPUs are treated as if they're single-core and | 
 | 13 |  * single-threaded. | 
 | 14 |  * | 
 | 15 |  * This does not handle HOTPLUG_CPU yet. | 
 | 16 |  */ | 
 | 17 | #include <linux/sched.h> | 
 | 18 | #include <linux/err.h> | 
 | 19 | #include <linux/smp.h> | 
 | 20 |  | 
 | 21 | #include <asm/paravirt.h> | 
 | 22 | #include <asm/desc.h> | 
 | 23 | #include <asm/pgtable.h> | 
 | 24 | #include <asm/cpu.h> | 
 | 25 |  | 
 | 26 | #include <xen/interface/xen.h> | 
 | 27 | #include <xen/interface/vcpu.h> | 
 | 28 |  | 
 | 29 | #include <asm/xen/interface.h> | 
 | 30 | #include <asm/xen/hypercall.h> | 
 | 31 |  | 
 | 32 | #include <xen/page.h> | 
 | 33 | #include <xen/events.h> | 
 | 34 |  | 
 | 35 | #include "xen-ops.h" | 
 | 36 | #include "mmu.h" | 
 | 37 |  | 
 | 38 | static cpumask_t cpu_initialized_map; | 
 | 39 | static DEFINE_PER_CPU(int, resched_irq); | 
 | 40 | static DEFINE_PER_CPU(int, callfunc_irq); | 
 | 41 |  | 
 | 42 | /* | 
 | 43 |  * Structure and data for smp_call_function(). This is designed to minimise | 
 | 44 |  * static memory requirements. It also looks cleaner. | 
 | 45 |  */ | 
 | 46 | static DEFINE_SPINLOCK(call_lock); | 
 | 47 |  | 
 | 48 | struct call_data_struct { | 
 | 49 | 	void (*func) (void *info); | 
 | 50 | 	void *info; | 
 | 51 | 	atomic_t started; | 
 | 52 | 	atomic_t finished; | 
 | 53 | 	int wait; | 
 | 54 | }; | 
 | 55 |  | 
 | 56 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id); | 
 | 57 |  | 
 | 58 | static struct call_data_struct *call_data; | 
 | 59 |  | 
 | 60 | /* | 
 | 61 |  * Reschedule call back. Nothing to do, | 
 | 62 |  * all the work is done automatically when | 
 | 63 |  * we return from the interrupt. | 
 | 64 |  */ | 
 | 65 | static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) | 
 | 66 | { | 
 | 67 | 	return IRQ_HANDLED; | 
 | 68 | } | 
 | 69 |  | 
 | 70 | static __cpuinit void cpu_bringup_and_idle(void) | 
 | 71 | { | 
 | 72 | 	int cpu = smp_processor_id(); | 
 | 73 |  | 
 | 74 | 	cpu_init(); | 
 | 75 |  | 
 | 76 | 	preempt_disable(); | 
 | 77 | 	per_cpu(cpu_state, cpu) = CPU_ONLINE; | 
 | 78 |  | 
 | 79 | 	xen_setup_cpu_clockevents(); | 
 | 80 |  | 
 | 81 | 	/* We can take interrupts now: we're officially "up". */ | 
 | 82 | 	local_irq_enable(); | 
 | 83 |  | 
 | 84 | 	wmb();			/* make sure everything is out */ | 
 | 85 | 	cpu_idle(); | 
 | 86 | } | 
 | 87 |  | 
 | 88 | static int xen_smp_intr_init(unsigned int cpu) | 
 | 89 | { | 
 | 90 | 	int rc; | 
 | 91 | 	const char *resched_name, *callfunc_name; | 
 | 92 |  | 
 | 93 | 	per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1; | 
 | 94 |  | 
 | 95 | 	resched_name = kasprintf(GFP_KERNEL, "resched%d", cpu); | 
 | 96 | 	rc = bind_ipi_to_irqhandler(XEN_RESCHEDULE_VECTOR, | 
 | 97 | 				    cpu, | 
 | 98 | 				    xen_reschedule_interrupt, | 
 | 99 | 				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | 
 | 100 | 				    resched_name, | 
 | 101 | 				    NULL); | 
 | 102 | 	if (rc < 0) | 
 | 103 | 		goto fail; | 
 | 104 | 	per_cpu(resched_irq, cpu) = rc; | 
 | 105 |  | 
 | 106 | 	callfunc_name = kasprintf(GFP_KERNEL, "callfunc%d", cpu); | 
 | 107 | 	rc = bind_ipi_to_irqhandler(XEN_CALL_FUNCTION_VECTOR, | 
 | 108 | 				    cpu, | 
 | 109 | 				    xen_call_function_interrupt, | 
 | 110 | 				    IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, | 
 | 111 | 				    callfunc_name, | 
 | 112 | 				    NULL); | 
 | 113 | 	if (rc < 0) | 
 | 114 | 		goto fail; | 
 | 115 | 	per_cpu(callfunc_irq, cpu) = rc; | 
 | 116 |  | 
 | 117 | 	return 0; | 
 | 118 |  | 
 | 119 |  fail: | 
 | 120 | 	if (per_cpu(resched_irq, cpu) >= 0) | 
 | 121 | 		unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL); | 
 | 122 | 	if (per_cpu(callfunc_irq, cpu) >= 0) | 
 | 123 | 		unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL); | 
 | 124 | 	return rc; | 
 | 125 | } | 
 | 126 |  | 
 | 127 | void __init xen_fill_possible_map(void) | 
 | 128 | { | 
 | 129 | 	int i, rc; | 
 | 130 |  | 
 | 131 | 	for (i = 0; i < NR_CPUS; i++) { | 
 | 132 | 		rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL); | 
 | 133 | 		if (rc >= 0) | 
 | 134 | 			cpu_set(i, cpu_possible_map); | 
 | 135 | 	} | 
 | 136 | } | 
 | 137 |  | 
 | 138 | void __init xen_smp_prepare_boot_cpu(void) | 
 | 139 | { | 
 | 140 | 	int cpu; | 
 | 141 |  | 
 | 142 | 	BUG_ON(smp_processor_id() != 0); | 
 | 143 | 	native_smp_prepare_boot_cpu(); | 
 | 144 |  | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 145 | 	/* We've switched to the "real" per-cpu gdt, so make sure the | 
 | 146 | 	   old memory can be recycled */ | 
 | 147 | 	make_lowmem_page_readwrite(&per_cpu__gdt_page); | 
 | 148 |  | 
 | 149 | 	for (cpu = 0; cpu < NR_CPUS; cpu++) { | 
| Mike Travis | d5a7430 | 2007-10-16 01:24:05 -0700 | [diff] [blame] | 150 | 		cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 
| Mike Travis | 0835761 | 2007-10-16 01:24:04 -0700 | [diff] [blame] | 151 | 		/* | 
 | 152 | 		 * cpu_core_map lives in a per cpu area that is cleared | 
 | 153 | 		 * when the per cpu array is allocated. | 
 | 154 | 		 * | 
 | 155 | 		 * cpus_clear(per_cpu(cpu_core_map, cpu)); | 
 | 156 | 		 */ | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 157 | 	} | 
| Jeremy Fitzhardinge | 60223a3 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 158 |  | 
 | 159 | 	xen_setup_vcpu_info_placement(); | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 160 | } | 
 | 161 |  | 
 | 162 | void __init xen_smp_prepare_cpus(unsigned int max_cpus) | 
 | 163 | { | 
 | 164 | 	unsigned cpu; | 
 | 165 |  | 
 | 166 | 	for (cpu = 0; cpu < NR_CPUS; cpu++) { | 
| Mike Travis | d5a7430 | 2007-10-16 01:24:05 -0700 | [diff] [blame] | 167 | 		cpus_clear(per_cpu(cpu_sibling_map, cpu)); | 
| Mike Travis | 0835761 | 2007-10-16 01:24:04 -0700 | [diff] [blame] | 168 | 		/* | 
 | 169 | 		 * cpu_core_ map will be zeroed when the per | 
 | 170 | 		 * cpu area is allocated. | 
 | 171 | 		 * | 
 | 172 | 		 * cpus_clear(per_cpu(cpu_core_map, cpu)); | 
 | 173 | 		 */ | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 174 | 	} | 
 | 175 |  | 
 | 176 | 	smp_store_cpu_info(0); | 
 | 177 | 	set_cpu_sibling_map(0); | 
 | 178 |  | 
 | 179 | 	if (xen_smp_intr_init(0)) | 
 | 180 | 		BUG(); | 
 | 181 |  | 
 | 182 | 	cpu_initialized_map = cpumask_of_cpu(0); | 
 | 183 |  | 
 | 184 | 	/* Restrict the possible_map according to max_cpus. */ | 
 | 185 | 	while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) { | 
 | 186 | 		for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--) | 
 | 187 | 			continue; | 
 | 188 | 		cpu_clear(cpu, cpu_possible_map); | 
 | 189 | 	} | 
 | 190 |  | 
 | 191 | 	for_each_possible_cpu (cpu) { | 
 | 192 | 		struct task_struct *idle; | 
 | 193 |  | 
 | 194 | 		if (cpu == 0) | 
 | 195 | 			continue; | 
 | 196 |  | 
 | 197 | 		idle = fork_idle(cpu); | 
 | 198 | 		if (IS_ERR(idle)) | 
 | 199 | 			panic("failed fork for CPU %d", cpu); | 
 | 200 |  | 
 | 201 | 		cpu_set(cpu, cpu_present_map); | 
 | 202 | 	} | 
 | 203 |  | 
 | 204 | 	//init_xenbus_allowed_cpumask(); | 
 | 205 | } | 
 | 206 |  | 
 | 207 | static __cpuinit int | 
 | 208 | cpu_initialize_context(unsigned int cpu, struct task_struct *idle) | 
 | 209 | { | 
 | 210 | 	struct vcpu_guest_context *ctxt; | 
 | 211 | 	struct gdt_page *gdt = &per_cpu(gdt_page, cpu); | 
 | 212 |  | 
 | 213 | 	if (cpu_test_and_set(cpu, cpu_initialized_map)) | 
 | 214 | 		return 0; | 
 | 215 |  | 
 | 216 | 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL); | 
 | 217 | 	if (ctxt == NULL) | 
 | 218 | 		return -ENOMEM; | 
 | 219 |  | 
 | 220 | 	ctxt->flags = VGCF_IN_KERNEL; | 
 | 221 | 	ctxt->user_regs.ds = __USER_DS; | 
 | 222 | 	ctxt->user_regs.es = __USER_DS; | 
 | 223 | 	ctxt->user_regs.fs = __KERNEL_PERCPU; | 
 | 224 | 	ctxt->user_regs.gs = 0; | 
 | 225 | 	ctxt->user_regs.ss = __KERNEL_DS; | 
 | 226 | 	ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; | 
 | 227 | 	ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ | 
 | 228 |  | 
 | 229 | 	memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); | 
 | 230 |  | 
 | 231 | 	xen_copy_trap_info(ctxt->trap_ctxt); | 
 | 232 |  | 
 | 233 | 	ctxt->ldt_ents = 0; | 
 | 234 |  | 
 | 235 | 	BUG_ON((unsigned long)gdt->gdt & ~PAGE_MASK); | 
 | 236 | 	make_lowmem_page_readonly(gdt->gdt); | 
 | 237 |  | 
 | 238 | 	ctxt->gdt_frames[0] = virt_to_mfn(gdt->gdt); | 
 | 239 | 	ctxt->gdt_ents      = ARRAY_SIZE(gdt->gdt); | 
 | 240 |  | 
 | 241 | 	ctxt->user_regs.cs = __KERNEL_CS; | 
 | 242 | 	ctxt->user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs); | 
 | 243 |  | 
 | 244 | 	ctxt->kernel_ss = __KERNEL_DS; | 
 | 245 | 	ctxt->kernel_sp = idle->thread.esp0; | 
 | 246 |  | 
 | 247 | 	ctxt->event_callback_cs     = __KERNEL_CS; | 
 | 248 | 	ctxt->event_callback_eip    = (unsigned long)xen_hypervisor_callback; | 
 | 249 | 	ctxt->failsafe_callback_cs  = __KERNEL_CS; | 
 | 250 | 	ctxt->failsafe_callback_eip = (unsigned long)xen_failsafe_callback; | 
 | 251 |  | 
 | 252 | 	per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); | 
 | 253 | 	ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir)); | 
 | 254 |  | 
 | 255 | 	if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, ctxt)) | 
 | 256 | 		BUG(); | 
 | 257 |  | 
 | 258 | 	kfree(ctxt); | 
 | 259 | 	return 0; | 
 | 260 | } | 
 | 261 |  | 
 | 262 | int __cpuinit xen_cpu_up(unsigned int cpu) | 
 | 263 | { | 
 | 264 | 	struct task_struct *idle = idle_task(cpu); | 
 | 265 | 	int rc; | 
 | 266 |  | 
 | 267 | #if 0 | 
 | 268 | 	rc = cpu_up_check(cpu); | 
 | 269 | 	if (rc) | 
 | 270 | 		return rc; | 
 | 271 | #endif | 
 | 272 |  | 
 | 273 | 	init_gdt(cpu); | 
 | 274 | 	per_cpu(current_task, cpu) = idle; | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 275 | 	irq_ctx_init(cpu); | 
 | 276 | 	xen_setup_timer(cpu); | 
 | 277 |  | 
 | 278 | 	/* make sure interrupts start blocked */ | 
 | 279 | 	per_cpu(xen_vcpu, cpu)->evtchn_upcall_mask = 1; | 
 | 280 |  | 
 | 281 | 	rc = cpu_initialize_context(cpu, idle); | 
 | 282 | 	if (rc) | 
 | 283 | 		return rc; | 
 | 284 |  | 
 | 285 | 	if (num_online_cpus() == 1) | 
 | 286 | 		alternatives_smp_switch(1); | 
 | 287 |  | 
 | 288 | 	rc = xen_smp_intr_init(cpu); | 
 | 289 | 	if (rc) | 
 | 290 | 		return rc; | 
 | 291 |  | 
 | 292 | 	smp_store_cpu_info(cpu); | 
 | 293 | 	set_cpu_sibling_map(cpu); | 
 | 294 | 	/* This must be done before setting cpu_online_map */ | 
 | 295 | 	wmb(); | 
 | 296 |  | 
 | 297 | 	cpu_set(cpu, cpu_online_map); | 
 | 298 |  | 
 | 299 | 	rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL); | 
 | 300 | 	BUG_ON(rc); | 
 | 301 |  | 
 | 302 | 	return 0; | 
 | 303 | } | 
 | 304 |  | 
 | 305 | void xen_smp_cpus_done(unsigned int max_cpus) | 
 | 306 | { | 
 | 307 | } | 
 | 308 |  | 
 | 309 | static void stop_self(void *v) | 
 | 310 | { | 
 | 311 | 	int cpu = smp_processor_id(); | 
 | 312 |  | 
 | 313 | 	/* make sure we're not pinning something down */ | 
 | 314 | 	load_cr3(swapper_pg_dir); | 
 | 315 | 	/* should set up a minimal gdt */ | 
 | 316 |  | 
 | 317 | 	HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL); | 
 | 318 | 	BUG(); | 
 | 319 | } | 
 | 320 |  | 
 | 321 | void xen_smp_send_stop(void) | 
 | 322 | { | 
| Jeremy Fitzhardinge | fefa629 | 2007-07-17 18:37:07 -0700 | [diff] [blame] | 323 | 	smp_call_function(stop_self, NULL, 0, 0); | 
| Jeremy Fitzhardinge | f87e4ca | 2007-07-17 18:37:06 -0700 | [diff] [blame] | 324 | } | 
 | 325 |  | 
 | 326 | void xen_smp_send_reschedule(int cpu) | 
 | 327 | { | 
 | 328 | 	xen_send_IPI_one(cpu, XEN_RESCHEDULE_VECTOR); | 
 | 329 | } | 
 | 330 |  | 
 | 331 |  | 
 | 332 | static void xen_send_IPI_mask(cpumask_t mask, enum ipi_vector vector) | 
 | 333 | { | 
 | 334 | 	unsigned cpu; | 
 | 335 |  | 
 | 336 | 	cpus_and(mask, mask, cpu_online_map); | 
 | 337 |  | 
 | 338 | 	for_each_cpu_mask(cpu, mask) | 
 | 339 | 		xen_send_IPI_one(cpu, vector); | 
 | 340 | } | 
 | 341 |  | 
 | 342 | static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id) | 
 | 343 | { | 
 | 344 | 	void (*func) (void *info) = call_data->func; | 
 | 345 | 	void *info = call_data->info; | 
 | 346 | 	int wait = call_data->wait; | 
 | 347 |  | 
 | 348 | 	/* | 
 | 349 | 	 * Notify initiating CPU that I've grabbed the data and am | 
 | 350 | 	 * about to execute the function | 
 | 351 | 	 */ | 
 | 352 | 	mb(); | 
 | 353 | 	atomic_inc(&call_data->started); | 
 | 354 | 	/* | 
 | 355 | 	 * At this point the info structure may be out of scope unless wait==1 | 
 | 356 | 	 */ | 
 | 357 | 	irq_enter(); | 
 | 358 | 	(*func)(info); | 
 | 359 | 	irq_exit(); | 
 | 360 |  | 
 | 361 | 	if (wait) { | 
 | 362 | 		mb();		/* commit everything before setting finished */ | 
 | 363 | 		atomic_inc(&call_data->finished); | 
 | 364 | 	} | 
 | 365 |  | 
 | 366 | 	return IRQ_HANDLED; | 
 | 367 | } | 
 | 368 |  | 
 | 369 | int xen_smp_call_function_mask(cpumask_t mask, void (*func)(void *), | 
 | 370 | 			       void *info, int wait) | 
 | 371 | { | 
 | 372 | 	struct call_data_struct data; | 
 | 373 | 	int cpus; | 
 | 374 |  | 
 | 375 | 	/* Holding any lock stops cpus from going down. */ | 
 | 376 | 	spin_lock(&call_lock); | 
 | 377 |  | 
 | 378 | 	cpu_clear(smp_processor_id(), mask); | 
 | 379 |  | 
 | 380 | 	cpus = cpus_weight(mask); | 
 | 381 | 	if (!cpus) { | 
 | 382 | 		spin_unlock(&call_lock); | 
 | 383 | 		return 0; | 
 | 384 | 	} | 
 | 385 |  | 
 | 386 | 	/* Can deadlock when called with interrupts disabled */ | 
 | 387 | 	WARN_ON(irqs_disabled()); | 
 | 388 |  | 
 | 389 | 	data.func = func; | 
 | 390 | 	data.info = info; | 
 | 391 | 	atomic_set(&data.started, 0); | 
 | 392 | 	data.wait = wait; | 
 | 393 | 	if (wait) | 
 | 394 | 		atomic_set(&data.finished, 0); | 
 | 395 |  | 
 | 396 | 	call_data = &data; | 
 | 397 | 	mb();			/* write everything before IPI */ | 
 | 398 |  | 
 | 399 | 	/* Send a message to other CPUs and wait for them to respond */ | 
 | 400 | 	xen_send_IPI_mask(mask, XEN_CALL_FUNCTION_VECTOR); | 
 | 401 |  | 
 | 402 | 	/* Make sure other vcpus get a chance to run. | 
 | 403 | 	   XXX too severe?  Maybe we should check the other CPU's states? */ | 
 | 404 | 	HYPERVISOR_sched_op(SCHEDOP_yield, 0); | 
 | 405 |  | 
 | 406 | 	/* Wait for response */ | 
 | 407 | 	while (atomic_read(&data.started) != cpus || | 
 | 408 | 	       (wait && atomic_read(&data.finished) != cpus)) | 
 | 409 | 		cpu_relax(); | 
 | 410 |  | 
 | 411 | 	spin_unlock(&call_lock); | 
 | 412 |  | 
 | 413 | 	return 0; | 
 | 414 | } |