2 * Xen SMP booting functions
4 * See arch/i386/kernel/smpboot.c for copyright and credits for derived
5 * portions of this file.
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
12 #include <linux/sched.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/smp_lock.h>
15 #include <linux/irq.h>
16 #include <linux/bootmem.h>
17 #include <linux/notifier.h>
18 #include <linux/cpu.h>
19 #include <linux/percpu.h>
21 #include <asm/arch_hooks.h>
22 #include <asm/pgalloc.h>
23 #include <xen/evtchn.h>
24 #include <xen/interface/vcpu.h>
25 #include <xen/cpu_hotplug.h>
26 #include <xen/xenbus.h>
28 extern irqreturn_t smp_reschedule_interrupt(int, void *);
29 extern irqreturn_t smp_call_function_interrupt(int, void *);
31 extern int local_setup_timer(unsigned int cpu);
32 extern void local_teardown_timer(unsigned int cpu);
34 extern void hypervisor_callback(void);
35 extern void failsafe_callback(void);
36 extern void system_call(void);
37 extern void smp_trap_init(trap_info_t *);
39 /* Number of siblings per CPU package */
40 int smp_num_siblings = 1;
42 cpumask_t cpu_online_map;
43 EXPORT_SYMBOL(cpu_online_map);
44 cpumask_t cpu_possible_map;
45 EXPORT_SYMBOL(cpu_possible_map);
46 cpumask_t cpu_initialized_map;
48 DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
49 EXPORT_PER_CPU_SYMBOL(cpu_info);
51 #ifdef CONFIG_HOTPLUG_CPU
52 DEFINE_PER_CPU(int, cpu_state) = { 0 };
55 static DEFINE_PER_CPU(int, resched_irq);
56 static DEFINE_PER_CPU(int, callfunc_irq);
57 static char resched_name[NR_CPUS][15];
58 static char callfunc_name[NR_CPUS][15];
60 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
62 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
63 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
64 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
67 DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
68 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
71 void __init prefill_possible_map(void)
75 for_each_possible_cpu(i)
76 if (i != smp_processor_id())
79 for (i = 0; i < NR_CPUS; i++) {
80 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
82 cpu_set(i, cpu_possible_map);
86 void __init smp_alloc_memory(void)
91 set_cpu_sibling_map(unsigned int cpu)
93 cpu_data(cpu).phys_proc_id = cpu;
94 cpu_data(cpu).cpu_core_id = 0;
96 per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu);
97 per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu);
99 cpu_data(cpu).booted_cores = 1;
103 remove_siblinginfo(unsigned int cpu)
105 cpu_data(cpu).phys_proc_id = BAD_APICID;
106 cpu_data(cpu).cpu_core_id = BAD_APICID;
108 cpus_clear(per_cpu(cpu_sibling_map, cpu));
109 cpus_clear(per_cpu(cpu_core_map, cpu));
111 cpu_data(cpu).booted_cores = 0;
114 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
118 per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
120 sprintf(resched_name[cpu], "resched%u", cpu);
121 rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
123 smp_reschedule_interrupt,
129 per_cpu(resched_irq, cpu) = rc;
131 sprintf(callfunc_name[cpu], "callfunc%u", cpu);
132 rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
134 smp_call_function_interrupt,
140 per_cpu(callfunc_irq, cpu) = rc;
142 if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
148 if (per_cpu(resched_irq, cpu) >= 0)
149 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
150 if (per_cpu(callfunc_irq, cpu) >= 0)
151 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
155 #ifdef CONFIG_HOTPLUG_CPU
156 static void xen_smp_intr_exit(unsigned int cpu)
159 local_teardown_timer(cpu);
161 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
162 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
166 void __cpuinit cpu_bringup(void)
170 identify_secondary_cpu(¤t_cpu_data);
172 identify_cpu(¤t_cpu_data);
174 touch_softlockup_watchdog();
179 static void __cpuinit cpu_bringup_and_idle(void)
185 static void __cpuinit cpu_initialize_context(unsigned int cpu)
187 /* vcpu_guest_context_t is too large to allocate on the stack.
188 * Hence we allocate statically and protect it with a lock */
189 static vcpu_guest_context_t ctxt;
190 static DEFINE_SPINLOCK(ctxt_lock);
192 struct task_struct *idle = idle_task(cpu);
194 if (cpu_test_and_set(cpu, cpu_initialized_map))
197 spin_lock(&ctxt_lock);
199 memset(&ctxt, 0, sizeof(ctxt));
201 ctxt.flags = VGCF_IN_KERNEL;
202 ctxt.user_regs.ds = __USER_DS;
203 ctxt.user_regs.es = __USER_DS;
204 ctxt.user_regs.fs = 0;
205 ctxt.user_regs.gs = 0;
206 ctxt.user_regs.ss = __KERNEL_DS;
207 ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
208 ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
210 memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
212 smp_trap_init(ctxt.trap_ctxt);
215 ctxt.gdt_ents = GDT_SIZE / 8;
218 ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
220 ctxt.user_regs.cs = __KERNEL_CS;
221 ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
223 ctxt.kernel_ss = __KERNEL_DS;
224 ctxt.kernel_sp = idle->thread.esp0;
226 ctxt.user_regs.fs = __KERNEL_PERCPU;
228 ctxt.event_callback_cs = __KERNEL_CS;
229 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
230 ctxt.failsafe_callback_cs = __KERNEL_CS;
231 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
233 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
234 #else /* __x86_64__ */
235 ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address);
237 ctxt.user_regs.cs = __KERNEL_CS;
238 ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
240 ctxt.kernel_ss = __KERNEL_DS;
241 ctxt.kernel_sp = idle->thread.rsp0;
243 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
244 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
245 ctxt.syscall_callback_eip = (unsigned long)system_call;
247 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
249 ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
252 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
255 spin_unlock(&ctxt_lock);
258 void __init smp_prepare_cpus(unsigned int max_cpus)
261 struct task_struct *idle;
263 struct desc_ptr *gdt_descr;
267 boot_cpu_data.apicid = 0;
268 cpu_data(0) = boot_cpu_data;
270 cpu_2_logical_apicid[0] = 0;
271 per_cpu(x86_cpu_to_apicid, 0) = 0;
273 current_thread_info()->cpu = 0;
275 for (cpu = 0; cpu < NR_CPUS; cpu++) {
276 cpus_clear(per_cpu(cpu_sibling_map, cpu));
277 cpus_clear(per_cpu(cpu_core_map, cpu));
280 set_cpu_sibling_map(0);
282 if (xen_smp_intr_init(0))
285 cpu_initialized_map = cpumask_of_cpu(0);
287 /* Restrict the possible_map according to max_cpus. */
288 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
289 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
291 cpu_clear(cpu, cpu_possible_map);
294 for_each_possible_cpu (cpu) {
298 idle = fork_idle(cpu);
300 panic("failed fork for CPU %d", cpu);
303 gdt_descr = &cpu_gdt_descr[cpu];
304 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
305 if (unlikely(!gdt_descr->address)) {
306 printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
310 gdt_descr->size = GDT_SIZE;
311 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
312 gdt_addr = (void *)gdt_descr->address;
315 gdt_addr = get_cpu_gdt_table(cpu);
317 make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
319 cpu_data(cpu) = boot_cpu_data;
320 cpu_data(cpu).apicid = cpu;
322 cpu_2_logical_apicid[cpu] = cpu;
323 per_cpu(x86_cpu_to_apicid, cpu) = cpu;
326 cpu_pda(cpu)->pcurrent = idle;
327 cpu_pda(cpu)->cpunumber = cpu;
328 clear_ti_thread_flag(task_thread_info(idle), TIF_FORK);
330 per_cpu(current_task, cpu) = idle;
335 #ifdef CONFIG_HOTPLUG_CPU
336 if (is_initial_xendomain())
337 cpu_set(cpu, cpu_present_map);
339 cpu_set(cpu, cpu_present_map);
343 init_xenbus_allowed_cpumask();
345 #ifdef CONFIG_X86_IO_APIC
347 * Here we can be sure that there is an IO-APIC in the system. Let's
350 if (!skip_ioapic_setup && nr_ioapics)
355 void __init smp_prepare_boot_cpu(void)
358 init_gdt(smp_processor_id());
361 prefill_possible_map();
364 #ifdef CONFIG_HOTPLUG_CPU
367 * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
368 * But do it early enough to catch critical for_each_present_cpu() loops
369 * in i386-specific code.
371 static int __init initialize_cpu_present_map(void)
373 cpu_present_map = cpu_possible_map;
376 core_initcall(initialize_cpu_present_map);
378 int __cpu_disable(void)
380 cpumask_t map = cpu_online_map;
381 unsigned int cpu = smp_processor_id();
386 remove_siblinginfo(cpu);
390 cpu_clear(cpu, cpu_online_map);
395 void __cpu_die(unsigned int cpu)
397 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
398 current->state = TASK_UNINTERRUPTIBLE;
399 schedule_timeout(HZ/10);
402 xen_smp_intr_exit(cpu);
404 if (num_online_cpus() == 1)
405 alternatives_smp_switch(0);
408 #endif /* CONFIG_HOTPLUG_CPU */
410 int __cpuinit __cpu_up(unsigned int cpu)
414 rc = cpu_up_check(cpu);
418 cpu_initialize_context(cpu);
420 if (num_online_cpus() == 1)
421 alternatives_smp_switch(1);
423 /* This must be done before setting cpu_online_map */
424 set_cpu_sibling_map(cpu);
427 rc = xen_smp_intr_init(cpu);
429 remove_siblinginfo(cpu);
433 cpu_set(cpu, cpu_online_map);
435 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
441 void __init smp_cpus_done(unsigned int max_cpus)
445 #ifndef CONFIG_X86_LOCAL_APIC
446 int setup_profiling_timer(unsigned int multiplier)