2 * Xen SMP booting functions
4 * See arch/i386/kernel/smpboot.c for copyright and credits for derived
5 * portions of this file.
8 #include <linux/init.h>
9 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/irq.h>
14 #include <linux/notifier.h>
15 #include <linux/cpu.h>
16 #include <linux/percpu.h>
18 #include <asm/pgalloc.h>
19 #include <xen/clock.h>
20 #include <xen/evtchn.h>
21 #include <xen/interface/vcpu.h>
22 #include <xen/cpu_hotplug.h>
23 #include <xen/xenbus.h>
25 extern int local_setup_timer(unsigned int cpu);
26 extern void local_teardown_timer(unsigned int cpu);
28 extern void hypervisor_callback(void);
29 extern void failsafe_callback(void);
30 extern void system_call(void);
31 extern void smp_trap_init(trap_info_t *);
33 cpumask_var_t vcpu_initialized_mask;
35 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
36 EXPORT_PER_CPU_SYMBOL(cpu_info);
38 static int __read_mostly ipi_irq = -1;
40 void __init prefill_possible_map(void)
44 for_each_possible_cpu(i)
45 if (i != smp_processor_id())
48 for (i = 0; i < NR_CPUS; i++) {
49 #ifndef CONFIG_HOTPLUG_CPU
50 if (i >= setup_max_cpus)
53 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
55 set_cpu_possible(i, true);
59 total_cpus = num_possible_cpus();
60 for (; HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) >= 0; ++i)
61 if (i != smp_processor_id())
65 static irqreturn_t ipi_interrupt(int irq, void *dev_id)
67 static void (*const handlers[])(struct pt_regs *) = {
68 [RESCHEDULE_VECTOR] = smp_reschedule_interrupt,
69 [CALL_FUNCTION_VECTOR] = smp_call_function_interrupt,
70 [CALL_FUNC_SINGLE_VECTOR] = smp_call_function_single_interrupt,
71 [REBOOT_VECTOR] = smp_reboot_interrupt,
72 #ifdef CONFIG_IRQ_WORK
73 [IRQ_WORK_VECTOR] = smp_irq_work_interrupt,
76 unsigned long *pending = __get_cpu_var(ipi_pending);
77 struct pt_regs *regs = get_irq_regs();
78 irqreturn_t ret = IRQ_NONE;
81 unsigned int ipi = find_first_bit(pending, NR_IPIS);
85 ipi = find_first_bit(pending, NR_IPIS);
91 clear_bit(ipi, pending);
93 ipi = find_next_bit(pending, NR_IPIS, ipi);
94 } while (ipi < NR_IPIS);
98 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
100 static struct irqaction ipi_action = {
101 .handler = ipi_interrupt,
102 .flags = IRQF_DISABLED,
107 rc = bind_ipi_to_irqaction(cpu, &ipi_action);
113 BUG_ON(ipi_irq != rc);
115 rc = xen_spinlock_init(cpu);
119 if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
125 xen_spinlock_cleanup(cpu);
127 unbind_from_per_cpu_irq(ipi_irq, cpu, NULL);
131 static void __cpuinit xen_smp_intr_exit(unsigned int cpu)
134 local_teardown_timer(cpu);
136 unbind_from_per_cpu_irq(ipi_irq, cpu, NULL);
137 xen_spinlock_cleanup(cpu);
140 static void __cpuinit cpu_bringup(void)
145 identify_secondary_cpu(__this_cpu_ptr(&cpu_info));
146 touch_softlockup_watchdog();
148 xen_setup_cpu_clockevents();
149 cpu = smp_processor_id();
150 notify_cpu_starting(cpu);
152 set_cpu_online(cpu, true);
153 ipi_call_unlock_irq();
156 static void __cpuinit cpu_bringup_and_idle(void)
162 static void __cpuinit cpu_initialize_context(unsigned int cpu)
164 /* vcpu_guest_context_t is too large to allocate on the stack.
165 * Hence we allocate statically and protect it with a lock */
166 static vcpu_guest_context_t ctxt;
167 static DEFINE_SPINLOCK(ctxt_lock);
169 struct task_struct *idle = idle_task(cpu);
171 if (cpumask_test_and_set_cpu(cpu, vcpu_initialized_mask))
174 spin_lock(&ctxt_lock);
176 memset(&ctxt, 0, sizeof(ctxt));
178 ctxt.flags = VGCF_IN_KERNEL;
179 ctxt.user_regs.ds = __USER_DS;
180 ctxt.user_regs.es = __USER_DS;
181 ctxt.user_regs.ss = __KERNEL_DS;
182 ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
183 ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
185 smp_trap_init(ctxt.trap_ctxt);
187 ctxt.gdt_frames[0] = arbitrary_virt_to_mfn(get_cpu_gdt_table(cpu));
188 ctxt.gdt_ents = GDT_SIZE / 8;
190 ctxt.user_regs.cs = __KERNEL_CS;
191 ctxt.user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
193 ctxt.kernel_ss = __KERNEL_DS;
194 ctxt.kernel_sp = idle->thread.sp0;
196 ctxt.event_callback_eip = (unsigned long)hypervisor_callback;
197 ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
199 ctxt.event_callback_cs = __KERNEL_CS;
200 ctxt.failsafe_callback_cs = __KERNEL_CS;
202 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
204 ctxt.user_regs.fs = __KERNEL_PERCPU;
205 ctxt.user_regs.gs = __KERNEL_STACK_CANARY;
206 #else /* __x86_64__ */
207 ctxt.syscall_callback_eip = (unsigned long)system_call;
209 ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
211 ctxt.gs_base_kernel = per_cpu_offset(cpu);
214 if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
217 spin_unlock(&ctxt_lock);
220 void __init smp_prepare_cpus(unsigned int max_cpus)
223 struct task_struct *idle;
225 struct vcpu_get_physid cpu_id;
229 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
230 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
231 cpu_data(0) = boot_cpu_data;
232 current_thread_info()->cpu = 0;
234 if (xen_smp_intr_init(0))
237 if (!alloc_cpumask_var(&vcpu_initialized_mask, GFP_KERNEL))
239 cpumask_copy(vcpu_initialized_mask, cpumask_of(0));
241 /* Restrict the possible_map according to max_cpus. */
242 while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
243 for (cpu = nr_cpu_ids-1; !cpu_possible(cpu); cpu--)
245 set_cpu_possible(cpu, false);
248 for_each_possible_cpu (cpu) {
252 idle = fork_idle(cpu);
254 panic("failed fork for CPU %d", cpu);
256 gdt_addr = get_cpu_gdt_table(cpu);
257 make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
260 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
261 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
262 cpu_data(cpu) = boot_cpu_data;
263 cpu_data(cpu).cpu_index = cpu;
266 clear_tsk_thread_flag(idle, TIF_FORK);
267 per_cpu(kernel_stack, cpu) =
268 (unsigned long)task_stack_page(idle) -
269 KERNEL_STACK_OFFSET + THREAD_SIZE;
271 per_cpu(current_task, cpu) = idle;
275 #ifdef CONFIG_HOTPLUG_CPU
276 if (is_initial_xendomain())
278 set_cpu_present(cpu, true);
281 init_xenbus_allowed_cpumask();
283 #ifdef CONFIG_X86_IO_APIC
285 * Here we can be sure that there is an IO-APIC in the system. Let's
288 if (cpu_has_apic && !skip_ioapic_setup && nr_ioapics)
293 void __init smp_prepare_boot_cpu(void)
297 switch_to_new_gdt(smp_processor_id());
298 prefill_possible_map();
299 for_each_possible_cpu(cpu)
300 if (cpu != smp_processor_id())
301 setup_vcpu_info(cpu);
304 #ifdef CONFIG_HOTPLUG_CPU
307 * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
308 * But do it early enough to catch critical for_each_present_cpu() loops
309 * in i386-specific code.
311 static int __init initialize_cpu_present_map(void)
315 for_each_possible_cpu(cpu)
316 set_cpu_present(cpu, true);
320 core_initcall(initialize_cpu_present_map);
322 int __cpuinit __cpu_disable(void)
324 unsigned int cpu = smp_processor_id();
329 set_cpu_online(cpu, false);
335 void __cpuinit __cpu_die(unsigned int cpu)
337 while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
338 current->state = TASK_UNINTERRUPTIBLE;
339 schedule_timeout(HZ/10);
342 xen_smp_intr_exit(cpu);
344 if (num_online_cpus() == 1)
345 alternatives_smp_switch(0);
348 #endif /* CONFIG_HOTPLUG_CPU */
350 int __cpuinit __cpu_up(unsigned int cpu)
354 rc = cpu_up_check(cpu);
358 rc = xen_smp_intr_init(cpu);
362 cpu_initialize_context(cpu);
364 if (num_online_cpus() == 1)
365 alternatives_smp_switch(1);
367 /* This must be done before setting cpu_online_map */
370 rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
372 /* Wait 5s total for a response. */
373 unsigned long timeout = jiffies + 5 * HZ;
375 while (!cpu_online(cpu) && time_before_eq(jiffies, timeout))
377 if (!cpu_online(cpu)) {
378 VOID(HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL));
384 xen_smp_intr_exit(cpu);
385 if (num_online_cpus() == 1)
386 alternatives_smp_switch(0);
392 void __ref play_dead(void)
394 #ifdef CONFIG_HOTPLUG_CPU
397 cpumask_clear_cpu(smp_processor_id(), cpu_initialized_mask);
398 preempt_enable_no_resched();
399 VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
406 void __init smp_cpus_done(unsigned int max_cpus)
411 #ifndef CONFIG_X86_LOCAL_APIC
412 int setup_profiling_timer(unsigned int multiplier)