- patches.rt/0001-sched-count-of-queued-RT-tasks.patch: Delete.
[linux-flexiantxendom0-3.2.10.git] / drivers / xen / core / smpboot.c
1 /*
2  *      Xen SMP booting functions
3  *
4  *      See arch/i386/kernel/smpboot.c for copyright and credits for derived
5  *      portions of this file.
6  */
7
8 #include <linux/module.h>
9 #include <linux/init.h>
10 #include <linux/kernel.h>
11 #include <linux/mm.h>
12 #include <linux/sched.h>
13 #include <linux/kernel_stat.h>
14 #include <linux/smp_lock.h>
15 #include <linux/irq.h>
16 #include <linux/bootmem.h>
17 #include <linux/notifier.h>
18 #include <linux/cpu.h>
19 #include <linux/percpu.h>
20 #include <asm/desc.h>
21 #include <asm/arch_hooks.h>
22 #include <asm/pgalloc.h>
23 #include <xen/evtchn.h>
24 #include <xen/interface/vcpu.h>
25 #include <xen/cpu_hotplug.h>
26 #include <xen/xenbus.h>
27
28 extern irqreturn_t smp_reschedule_interrupt(int, void *);
29 extern irqreturn_t smp_call_function_interrupt(int, void *);
30
31 extern int local_setup_timer(unsigned int cpu);
32 extern void local_teardown_timer(unsigned int cpu);
33
34 extern void hypervisor_callback(void);
35 extern void failsafe_callback(void);
36 extern void system_call(void);
37 extern void smp_trap_init(trap_info_t *);
38
39 /* Number of siblings per CPU package */
40 int smp_num_siblings = 1;
41
42 cpumask_t cpu_online_map;
43 EXPORT_SYMBOL(cpu_online_map);
44 cpumask_t cpu_possible_map;
45 EXPORT_SYMBOL(cpu_possible_map);
46 cpumask_t cpu_initialized_map;
47
48 DEFINE_PER_CPU(struct cpuinfo_x86, cpu_info);
49 EXPORT_PER_CPU_SYMBOL(cpu_info);
50
51 #ifdef CONFIG_HOTPLUG_CPU
52 DEFINE_PER_CPU(int, cpu_state) = { 0 };
53 #endif
54
55 static DEFINE_PER_CPU(int, resched_irq);
56 static DEFINE_PER_CPU(int, callfunc_irq);
57 static char resched_name[NR_CPUS][15];
58 static char callfunc_name[NR_CPUS][15];
59
60 u8 cpu_2_logical_apicid[NR_CPUS] = { [0 ... NR_CPUS-1] = BAD_APICID };
61
62 DEFINE_PER_CPU(cpumask_t, cpu_sibling_map);
63 DEFINE_PER_CPU(cpumask_t, cpu_core_map);
64 EXPORT_PER_CPU_SYMBOL(cpu_core_map);
65
66 #if defined(__i386__)
67 DEFINE_PER_CPU(u8, x86_cpu_to_apicid) = BAD_APICID;
68 EXPORT_PER_CPU_SYMBOL(x86_cpu_to_apicid);
69 #endif
70
71 void __init prefill_possible_map(void)
72 {
73         int i, rc;
74
75         for_each_possible_cpu(i)
76             if (i != smp_processor_id())
77                 return;
78
79         for (i = 0; i < NR_CPUS; i++) {
80                 rc = HYPERVISOR_vcpu_op(VCPUOP_is_up, i, NULL);
81                 if (rc >= 0)
82                         cpu_set(i, cpu_possible_map);
83         }
84 }
85
86 void __init smp_alloc_memory(void)
87 {
88 }
89
90 static inline void
91 set_cpu_sibling_map(unsigned int cpu)
92 {
93         cpu_data(cpu).phys_proc_id = cpu;
94         cpu_data(cpu).cpu_core_id  = 0;
95
96         per_cpu(cpu_sibling_map, cpu) = cpumask_of_cpu(cpu);
97         per_cpu(cpu_core_map, cpu) = cpumask_of_cpu(cpu);
98
99         cpu_data(cpu).booted_cores = 1;
100 }
101
102 static void
103 remove_siblinginfo(unsigned int cpu)
104 {
105         cpu_data(cpu).phys_proc_id = BAD_APICID;
106         cpu_data(cpu).cpu_core_id  = BAD_APICID;
107
108         cpus_clear(per_cpu(cpu_sibling_map, cpu));
109         cpus_clear(per_cpu(cpu_core_map, cpu));
110
111         cpu_data(cpu).booted_cores = 0;
112 }
113
114 static int __cpuinit xen_smp_intr_init(unsigned int cpu)
115 {
116         int rc;
117
118         per_cpu(resched_irq, cpu) = per_cpu(callfunc_irq, cpu) = -1;
119
120         sprintf(resched_name[cpu], "resched%u", cpu);
121         rc = bind_ipi_to_irqhandler(RESCHEDULE_VECTOR,
122                                     cpu,
123                                     smp_reschedule_interrupt,
124                                     IRQF_DISABLED,
125                                     resched_name[cpu],
126                                     NULL);
127         if (rc < 0)
128                 goto fail;
129         per_cpu(resched_irq, cpu) = rc;
130
131         sprintf(callfunc_name[cpu], "callfunc%u", cpu);
132         rc = bind_ipi_to_irqhandler(CALL_FUNCTION_VECTOR,
133                                     cpu,
134                                     smp_call_function_interrupt,
135                                     IRQF_DISABLED,
136                                     callfunc_name[cpu],
137                                     NULL);
138         if (rc < 0)
139                 goto fail;
140         per_cpu(callfunc_irq, cpu) = rc;
141
142         if ((cpu != 0) && ((rc = local_setup_timer(cpu)) != 0))
143                 goto fail;
144
145         return 0;
146
147  fail:
148         if (per_cpu(resched_irq, cpu) >= 0)
149                 unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
150         if (per_cpu(callfunc_irq, cpu) >= 0)
151                 unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
152         return rc;
153 }
154
155 #ifdef CONFIG_HOTPLUG_CPU
156 static void xen_smp_intr_exit(unsigned int cpu)
157 {
158         if (cpu != 0)
159                 local_teardown_timer(cpu);
160
161         unbind_from_irqhandler(per_cpu(resched_irq, cpu), NULL);
162         unbind_from_irqhandler(per_cpu(callfunc_irq, cpu), NULL);
163 }
164 #endif
165
166 void __cpuinit cpu_bringup(void)
167 {
168         cpu_init();
169 #ifdef __i386__
170         identify_secondary_cpu(&current_cpu_data);
171 #else
172         identify_cpu(&current_cpu_data);
173 #endif
174         touch_softlockup_watchdog();
175         preempt_disable();
176         local_irq_enable();
177 }
178
179 static void __cpuinit cpu_bringup_and_idle(void)
180 {
181         cpu_bringup();
182         cpu_idle();
183 }
184
185 static void __cpuinit cpu_initialize_context(unsigned int cpu)
186 {
187         /* vcpu_guest_context_t is too large to allocate on the stack.
188          * Hence we allocate statically and protect it with a lock */
189         static vcpu_guest_context_t ctxt;
190         static DEFINE_SPINLOCK(ctxt_lock);
191
192         struct task_struct *idle = idle_task(cpu);
193
194         if (cpu_test_and_set(cpu, cpu_initialized_map))
195                 return;
196
197         spin_lock(&ctxt_lock);
198
199         memset(&ctxt, 0, sizeof(ctxt));
200
201         ctxt.flags = VGCF_IN_KERNEL;
202         ctxt.user_regs.ds = __USER_DS;
203         ctxt.user_regs.es = __USER_DS;
204         ctxt.user_regs.fs = 0;
205         ctxt.user_regs.gs = 0;
206         ctxt.user_regs.ss = __KERNEL_DS;
207         ctxt.user_regs.eip = (unsigned long)cpu_bringup_and_idle;
208         ctxt.user_regs.eflags = X86_EFLAGS_IF | 0x1000; /* IOPL_RING1 */
209
210         memset(&ctxt.fpu_ctxt, 0, sizeof(ctxt.fpu_ctxt));
211
212         smp_trap_init(ctxt.trap_ctxt);
213
214         ctxt.ldt_ents = 0;
215         ctxt.gdt_ents = GDT_SIZE / 8;
216
217 #ifdef __i386__
218         ctxt.gdt_frames[0] = virt_to_mfn(get_cpu_gdt_table(cpu));
219
220         ctxt.user_regs.cs = __KERNEL_CS;
221         ctxt.user_regs.esp = idle->thread.esp0 - sizeof(struct pt_regs);
222
223         ctxt.kernel_ss = __KERNEL_DS;
224         ctxt.kernel_sp = idle->thread.esp0;
225
226         ctxt.user_regs.fs = __KERNEL_PERCPU;
227
228         ctxt.event_callback_cs     = __KERNEL_CS;
229         ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
230         ctxt.failsafe_callback_cs  = __KERNEL_CS;
231         ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
232
233         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(swapper_pg_dir));
234 #else /* __x86_64__ */
235         ctxt.gdt_frames[0] = virt_to_mfn(cpu_gdt_descr[cpu].address);
236
237         ctxt.user_regs.cs = __KERNEL_CS;
238         ctxt.user_regs.esp = idle->thread.rsp0 - sizeof(struct pt_regs);
239
240         ctxt.kernel_ss = __KERNEL_DS;
241         ctxt.kernel_sp = idle->thread.rsp0;
242
243         ctxt.event_callback_eip    = (unsigned long)hypervisor_callback;
244         ctxt.failsafe_callback_eip = (unsigned long)failsafe_callback;
245         ctxt.syscall_callback_eip  = (unsigned long)system_call;
246
247         ctxt.ctrlreg[3] = xen_pfn_to_cr3(virt_to_mfn(init_level4_pgt));
248
249         ctxt.gs_base_kernel = (unsigned long)(cpu_pda(cpu));
250 #endif
251
252         if (HYPERVISOR_vcpu_op(VCPUOP_initialise, cpu, &ctxt))
253                 BUG();
254
255         spin_unlock(&ctxt_lock);
256 }
257
258 void __init smp_prepare_cpus(unsigned int max_cpus)
259 {
260         unsigned int cpu;
261         struct task_struct *idle;
262 #ifdef __x86_64__
263         struct desc_ptr *gdt_descr;
264 #endif
265         void *gdt_addr;
266
267         boot_cpu_data.apicid = 0;
268         cpu_data(0) = boot_cpu_data;
269
270         cpu_2_logical_apicid[0] = 0;
271         per_cpu(x86_cpu_to_apicid, 0) = 0;
272
273         current_thread_info()->cpu = 0;
274
275         for (cpu = 0; cpu < NR_CPUS; cpu++) {
276                 cpus_clear(per_cpu(cpu_sibling_map, cpu));
277                 cpus_clear(per_cpu(cpu_core_map, cpu));
278         }
279
280         set_cpu_sibling_map(0);
281
282         if (xen_smp_intr_init(0))
283                 BUG();
284
285         cpu_initialized_map = cpumask_of_cpu(0);
286
287         /* Restrict the possible_map according to max_cpus. */
288         while ((num_possible_cpus() > 1) && (num_possible_cpus() > max_cpus)) {
289                 for (cpu = NR_CPUS-1; !cpu_isset(cpu, cpu_possible_map); cpu--)
290                         continue;
291                 cpu_clear(cpu, cpu_possible_map);
292         }
293
294         for_each_possible_cpu (cpu) {
295                 if (cpu == 0)
296                         continue;
297
298                 idle = fork_idle(cpu);
299                 if (IS_ERR(idle))
300                         panic("failed fork for CPU %d", cpu);
301
302 #ifdef __x86_64__
303                 gdt_descr = &cpu_gdt_descr[cpu];
304                 gdt_descr->address = get_zeroed_page(GFP_KERNEL);
305                 if (unlikely(!gdt_descr->address)) {
306                         printk(KERN_CRIT "CPU%d failed to allocate GDT\n",
307                                cpu);
308                         continue;
309                 }
310                 gdt_descr->size = GDT_SIZE;
311                 memcpy((void *)gdt_descr->address, cpu_gdt_table, GDT_SIZE);
312                 gdt_addr = (void *)gdt_descr->address;
313 #else
314                 init_gdt(cpu);
315                 gdt_addr = get_cpu_gdt_table(cpu);
316 #endif
317                 make_page_readonly(gdt_addr, XENFEAT_writable_descriptor_tables);
318
319                 cpu_data(cpu) = boot_cpu_data;
320                 cpu_data(cpu).apicid = cpu;
321
322                 cpu_2_logical_apicid[cpu] = cpu;
323                 per_cpu(x86_cpu_to_apicid, cpu) = cpu;
324
325 #ifdef __x86_64__
326                 cpu_pda(cpu)->pcurrent = idle;
327                 cpu_pda(cpu)->cpunumber = cpu;
328                 clear_ti_thread_flag(task_thread_info(idle), TIF_FORK);
329 #else
330                 per_cpu(current_task, cpu) = idle;
331 #endif
332
333                 irq_ctx_init(cpu);
334
335 #ifdef CONFIG_HOTPLUG_CPU
336                 if (is_initial_xendomain())
337                         cpu_set(cpu, cpu_present_map);
338 #else
339                 cpu_set(cpu, cpu_present_map);
340 #endif
341         }
342
343         init_xenbus_allowed_cpumask();
344
345 #ifdef CONFIG_X86_IO_APIC
346         /*
347          * Here we can be sure that there is an IO-APIC in the system. Let's
348          * go and set it up:
349          */
350         if (!skip_ioapic_setup && nr_ioapics)
351                 setup_IO_APIC();
352 #endif
353 }
354
355 void __init smp_prepare_boot_cpu(void)
356 {
357 #ifdef __i386__
358         init_gdt(smp_processor_id());
359         switch_to_new_gdt();
360 #endif
361         prefill_possible_map();
362 }
363
364 #ifdef CONFIG_HOTPLUG_CPU
365
366 /*
367  * Initialize cpu_present_map late to skip SMP boot code in init/main.c.
368  * But do it early enough to catch critical for_each_present_cpu() loops
369  * in i386-specific code.
370  */
371 static int __init initialize_cpu_present_map(void)
372 {
373         cpu_present_map = cpu_possible_map;
374         return 0;
375 }
376 core_initcall(initialize_cpu_present_map);
377
378 int __cpu_disable(void)
379 {
380         cpumask_t map = cpu_online_map;
381         unsigned int cpu = smp_processor_id();
382
383         if (cpu == 0)
384                 return -EBUSY;
385
386         remove_siblinginfo(cpu);
387
388         cpu_clear(cpu, map);
389         fixup_irqs(map);
390         cpu_clear(cpu, cpu_online_map);
391
392         return 0;
393 }
394
395 void __cpu_die(unsigned int cpu)
396 {
397         while (HYPERVISOR_vcpu_op(VCPUOP_is_up, cpu, NULL)) {
398                 current->state = TASK_UNINTERRUPTIBLE;
399                 schedule_timeout(HZ/10);
400         }
401
402         xen_smp_intr_exit(cpu);
403
404         if (num_online_cpus() == 1)
405                 alternatives_smp_switch(0);
406 }
407
408 #endif /* CONFIG_HOTPLUG_CPU */
409
410 int __cpuinit __cpu_up(unsigned int cpu)
411 {
412         int rc;
413
414         rc = cpu_up_check(cpu);
415         if (rc)
416                 return rc;
417
418         cpu_initialize_context(cpu);
419
420         if (num_online_cpus() == 1)
421                 alternatives_smp_switch(1);
422
423         /* This must be done before setting cpu_online_map */
424         set_cpu_sibling_map(cpu);
425         wmb();
426
427         rc = xen_smp_intr_init(cpu);
428         if (rc) {
429                 remove_siblinginfo(cpu);
430                 return rc;
431         }
432
433         cpu_set(cpu, cpu_online_map);
434
435         rc = HYPERVISOR_vcpu_op(VCPUOP_up, cpu, NULL);
436         BUG_ON(rc);
437
438         return 0;
439 }
440
441 void __init smp_cpus_done(unsigned int max_cpus)
442 {
443 }
444
445 #ifndef CONFIG_X86_LOCAL_APIC
446 int setup_profiling_timer(unsigned int multiplier)
447 {
448         return -EINVAL;
449 }
450 #endif