Update to 3.4-final.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / core / smpboot.c
1 /*
2  *      Xen SMP booting functions
3  *
4  *      See arch/i386/kernel/smpboot.c for copyright and credits for derived
5  *      portions of this file.
6  */
7
8 #include <linux/init.h>
9 #include <linux/kernel.h>
10 #include <linux/mm.h>
11 #include <linux/sched.h>
12 #include <linux/kernel_stat.h>
13 #include <linux/irq.h>
14 #include <linux/notifier.h>
15 #include <linux/cpu.h>
16 #include <linux/percpu.h>
17 #include <asm/desc.h>
18 #include <asm/pgalloc.h>
19 #include <xen/clock.h>
20 #include <xen/evtchn.h>
21 #include <xen/interface/vcpu.h>
22 #include <xen/cpu_hotplug.h>
23 #include <xen/xenbus.h>
24
25 extern int local_setup_timer(unsigned int cpu);
26 extern void local_teardown_timer(unsigned int cpu);
27
28 extern void hypervisor_callback(void);
29 extern void failsafe_callback(void);
30 extern void system_call(void);
31 extern void smp_trap_init(trap_info_t *);
32
33 cpumask_var_t vcpu_initialized_mask;
34
35 DEFINE_PER_CPU_READ_MOSTLY(struct cpuinfo_x86, cpu_info);
36 EXPORT_PER_CPU_SYMBOL(cpu_info);
37
38 static int __read_mostly ipi_irq = -1;
39
40 void __init prefill_possible_map(void)
41 {
42         int i, rc;
43
44         for_each_possible_cpu(i)
45             if (i != smp_processor_id())
46                 return;
47
48         for (i = 0; i < NR_CPUS; i++) {
49 #ifndef CONFIG_HOTPLUG_CPU
50                 if (i >= setup_max_cpus)
51                         break;
52 #endif
53                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
54                 if (rc >= 0) {
55                         set_cpu_possible(i, true);
56                         nr_cpu_ids = i + 1;
57                 }
58         }
59         total_cpus = num_possible_cpus();
60         for (; HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL) >= 0; ++i)
61                 if (i != smp_processor_id())
62                         ++total_cpus;
63 }
64
65 static irqreturn_t ipi_interrupt(int irq, void *dev_id)
66 {
67         static void (*const handlers[])(struct pt_regs *) = {
68                 [RESCHEDULE_VECTOR] = smp_reschedule_interrupt,
69                 [CALL_FUNCTION_VECTOR] = smp_call_function_interrupt,
70                 [CALL_FUNC_SINGLE_VECTOR] = smp_call_function_single_interrupt,
71                 [REBOOT_VECTOR] = smp_reboot_interrupt,
72 #ifdef CONFIG_IRQ_WORK
73                 [IRQ_WORK_VECTOR] = smp_irq_work_interrupt,
74 #endif
75         };
76         unsigned long *pending = __get_cpu_var(ipi_pending);
77         struct pt_regs *regs = get_irq_regs();
78         irqreturn_t ret = IRQ_NONE;
79
80         for (;;) {
81                 unsigned int ipi = find_first_bit(pending, NR_IPIS);
82
83                 if (ipi >= NR_IPIS) {
84                         clear_ipi_evtchn();
85                         ipi = find_first_bit(pending, NR_IPIS);
86                 }
87                 if (ipi >= NR_IPIS)
88                         return ret;
89                 ret = IRQ_HANDLED;
90                 do {
91                         clear_bit(ipi, pending);
92                         handlers[ipi](regs);
93                         ipi = find_next_bit(pending, NR_IPIS, ipi);
94                 } while (ipi < NR_IPIS);
95         }
96 }
97
98 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
99 {
100         static struct irqaction ipi_action = {
101                 .handler = ipi_interrupt,
102                 .flags   = IRQF_DISABLED,
103                 .name    = "ipi"
104         };
105         int rc;
106
107         rc = bind_ipi_to_irqaction(cpu, &ipi_action);
108         if (rc < 0)
109                 return rc;
110         if (ipi_irq < 0)
111                 ipi_irq = rc;
112         else
113                 BUG_ON(ipi_irq != rc);
114
115         rc = xen_spinlock_init(cpu);
116         if (rc < 0)
117                 goto unbind_ipi;
118
119         if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
120                 goto fail;
121
122         return 0;
123
124  fail:
125         xen_spinlock_cleanup(cpu);
126  unbind_ipi:
127         unbind_from_per_cpu_irq(ipi_irq, cpu, NULL);
128         return rc;
129 }
130
131 static void __cpuinit xen_smp_intr_exit(unsigned int cpu)
132 {
133         if (cpu != 0)
134                 local_teardown_timer(cpu);
135
136         unbind_from_per_cpu_irq(ipi_irq, cpu, NULL);
137         xen_spinlock_cleanup(cpu);
138 }
139
140 static void __cpuinit cpu_bringup(void)
141 {
142         unsigned int cpu;
143
144         cpu_init();
145         identify_secondary_cpu(__this_cpu_ptr(&cpu_info));
146         touch_softlockup_watchdog();
147         preempt_disable();
148         xen_setup_cpu_clockevents();
149         cpu = smp_processor_id();
150         notify_cpu_starting(cpu);
151         ipi_call_lock_irq();
152         set_cpu_online(cpu, true);
153         ipi_call_unlock_irq();
154 }
155
156 static void __cpuinit cpu_bringup_and_idle(void)
157 {
158         cpu_bringup();
159         cpu_idle();
160 }
161
162 static void __cpuinit cpu_initialize_context(unsigned int cpu)
163 {
164         /* vcpu_guest_context_t is too large to allocate on the stack.
165          * Hence we allocate statically and protect it with a lock */
166         static vcpu_guest_context_t ctxt;
167         static DEFINE_SPINLOCK(ctxt_lock);
168
169         struct task_struct *idle = idle_task(cpu);
170
171         if (cpumask_test_and_set_cpu(cpu, vcpu_initialized_mask))
172                 return;
173
174         spin_lock(&ctxt_lock);
175
176         memset(&ctxt, 0, sizeof(ctxt));
177
178         ctxt.flags = VGCF_IN_KERNEL;
179         ctxt.user_regs.ds = __USER_DS;
180         ctxt.user_regs.es = __USER_DS;
181         ctxt.user_regs.ss = __KERNEL_DS;
182         ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
183         ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
184
185         smp_trap_init(ctxt.trap_ctxt);
186
187         ctxt.gdt_frames[0] = arbitrary_virt_to_mfn(get_cpu_gdt_table(cpu));
188         ctxt.gdt_ents = GDT_SIZE / 8;
189
190         ctxt.user_regs.cs = __KERNEL_CS;
191         ctxt.user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs);
192
193         ctxt.kernel_ss = __KERNEL_DS;
194         ctxt.kernel_sp = idle->thread.sp0;
195
196         ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
197         ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
198 #ifdef __i386__
199         ctxt.event_callback_cs     = __KERNEL_CS;
200         ctxt.failsafe_callback_cs  = __KERNEL_CS;
201
202         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
203
204         ctxt.user_regs.fs = __KERNEL_PERCPU;
205         ctxt.user_regs.gs = __KERNEL_STACK_CANARY;
206 #else /* __x86_64__ */
207         ctxt.syscall_callback_eip  = (unsigned long)system_call;
208
209         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
210
211         ctxt.gs_base_kernel = per_cpu_offset(cpu);
212 #endif
213
214         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
215                 BUG();
216
217         spin_unlock(&ctxt_lock);
218 }
219
220 void __init smp_prepare_cpus(unsigned int max_cpus)
221 {
222         unsigned int cpu;
223         struct task_struct *idle;
224         int apicid;
225         struct vcpu_get_physid cpu_id;
226         void *gdt_addr;
227
228         apicid = 0;
229         if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, 0, &cpu_id) == 0)
230                 apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
231         cpu_data(0) = boot_cpu_data;
232         current_thread_info()->cpu = 0;
233
234         if (xen_smp_intr_init(0))
235                 BUG();
236
237         if (!alloc_cpumask_var(&vcpu_initialized_mask, GFP_KERNEL))
238                 BUG();
239         cpumask_copy(vcpu_initialized_mask, cpumask_of(0));
240
241         /* Restrict the possible_map according to max_cpus. */
242         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
243                 for (cpu = nr_cpu_ids-1; !cpu_possible(cpu); cpu--)
244                         continue;
245                 set_cpu_possible(cpu, false);
246         }
247
248         for_each_possible_cpu (cpu) {
249                 if (cpu == 0)
250                         continue;
251
252                 idle = fork_idle(cpu);
253                 if (IS_ERR(idle))
254                         panic("failed fork for CPU %d", cpu);
255
256                 gdt_addr = get_cpu_gdt_table(cpu);
257                 make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
258
259                 apicid = cpu;
260                 if (HYPERVISOR_vcpu_op(VCPUOP_get_physid, cpu, &cpu_id) == 0)
261                         apicid = xen_vcpu_physid_to_x86_apicid(cpu_id.phys_id);
262                 cpu_data(cpu) = boot_cpu_data;
263                 cpu_data(cpu).cpu_index = cpu;
264
265 #ifdef __x86_64__
266                 clear_tsk_thread_flag(idle, TIF_FORK);
267                 per_cpu(kernel_stack, cpu) =
268                         (unsigned long)task_stack_page(idle) -
269                         KERNEL_STACK_OFFSET + THREAD_SIZE;
270 #endif
271                 per_cpu(current_task, cpu) = idle;
272
273                 irq_ctx_init(cpu);
274
275 #ifdef CONFIG_HOTPLUG_CPU
276                 if (is_initial_xendomain())
277 #endif
278                         set_cpu_present(cpu, true);
279         }
280
281         init_xenbus_allowed_cpumask();
282
283 #ifdef CONFIG_X86_IO_APIC
284         /*
285          * Here we can be sure that there is an IO-APIC in the system. Let's
286          * go and set it up:
287          */
288         if (cpu_has_apic && !skip_ioapic_setup && nr_ioapics)
289                 setup_IO_APIC();
290 #endif
291 }
292
293 void __init smp_prepare_boot_cpu(void)
294 {
295         unsigned int cpu;
296
297         switch_to_new_gdt(smp_processor_id());
298         prefill_possible_map();
299         for_each_possible_cpu(cpu)
300                 if (cpu != smp_processor_id())
301                         setup_vcpu_info(cpu);
302 }
303
304 #ifdef CONFIG_HOTPLUG_CPU
305
306 /*
307  * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
308  * But do it early enough to catch critical for_each_present_cpu() loops
309  * in i386-specific code.
310  */
311 static int __init initialize_cpu_present_map(void)
312 {
313         unsigned int cpu;
314
315         for_each_possible_cpu(cpu)
316                 set_cpu_present(cpu, true);
317
318         return 0;
319 }
320 core_initcall(initialize_cpu_present_map);
321
322 int __cpuinit __cpu_disable(void)
323 {
324         unsigned int cpu = smp_processor_id();
325
326         if (cpu == 0)
327                 return -EBUSY;
328
329         set_cpu_online(cpu, false);
330         fixup_irqs();
331
332         return 0;
333 }
334
335 void __cpuinit __cpu_die(unsigned int cpu)
336 {
337         while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
338                 current->state = TASK_UNINTERRUPTIBLE;
339                 schedule_timeout(HZ/10);
340         }
341
342         xen_smp_intr_exit(cpu);
343
344         if (num_online_cpus() == 1)
345                 alternatives_smp_switch(0);
346 }
347
348 #endif /* CONFIG_HOTPLUG_CPU */
349
350 int __cpuinit __cpu_up(unsigned int cpu)
351 {
352         int rc;
353
354         rc = cpu_up_check(cpu);
355         if (rc)
356                 return rc;
357
358         rc = xen_smp_intr_init(cpu);
359         if (rc)
360                 return rc;
361
362         cpu_initialize_context(cpu);
363
364         if (num_online_cpus() == 1)
365                 alternatives_smp_switch(1);
366
367         /* This must be done before setting cpu_online_map */
368         wmb();
369
370         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
371         if (!rc) {
372                 /* Wait 5s total for a response. */
373                 unsigned long timeout = jiffies + 5 * HZ;
374
375                 while (!cpu_online(cpu) && time_before_eq(jiffies, timeout))
376                         HYPERVISOR_yield();
377                 if (!cpu_online(cpu)) {
378                         VOID(HYPERVISOR_vcpu_op(VCPUOP_down, cpu, NULL));
379                         rc = -ETIMEDOUT;
380                 }
381         }
382
383         if (rc) {
384                 xen_smp_intr_exit(cpu);
385                 if (num_online_cpus() == 1)
386                         alternatives_smp_switch(0);
387         }
388
389         return rc;
390 }
391
392 void __ref play_dead(void)
393 {
394 #ifdef CONFIG_HOTPLUG_CPU
395         idle_task_exit();
396         local_irq_disable();
397         cpumask_clear_cpu(smp_processor_id(), cpu_initialized_mask);
398         preempt_enable_no_resched();
399         VOID(HYPERVISOR_vcpu_op(VCPUOP_down, smp_processor_id(), NULL));
400         cpu_bringup();
401 #else
402         BUG();
403 #endif
404 }
405
406 void __init smp_cpus_done(unsigned int max_cpus)
407 {
408         nmi_selftest();
409 }
410
411 #ifndef CONFIG_X86_LOCAL_APIC
412 int setup_profiling_timer(unsigned int multiplier)
413 {
414         return -EINVAL;
415 }
416 #endif