+- add patches.fixes/linux-post-2.6.3-20040220
[linux-flexiantxendom0-3.2.10.git] / kernel / sched.c
index b54c7ba..629ea85 100644 (file)
@@ -37,6 +37,7 @@
 #include <linux/rcupdate.h>
 #include <linux/cpu.h>
 #include <linux/percpu.h>
+#include <linux/kthread.h>
 
 #ifdef CONFIG_NUMA
 #define cpu_to_node_mask(cpu) node_to_cpumask(cpu_to_node(cpu))
@@ -2530,34 +2531,36 @@ static inline struct task_struct *younger_sibling(struct task_struct *p)
 
 static void show_task(task_t * p)
 {
-       unsigned long free = 0;
        task_t *relative;
-       int state;
-       static const char * stat_nam[] = { "R", "S", "D", "T", "Z", "W" };
+       unsigned state;
+       unsigned long free = 0;
+       static const char *stat_nam[] = { "R", "S", "D", "T", "Z", "W" };
 
        printk("%-13.13s ", p->comm);
        state = p->state ? __ffs(p->state) + 1 : 0;
-       if (((unsigned) state) < sizeof(stat_nam)/sizeof(char *))
+       if (state < ARRAY_SIZE(stat_nam))
                printk(stat_nam[state]);
        else
-               printk(" ");
+               printk("?");
 #if (BITS_PER_LONG == 32)
-       if (p == current)
-               printk(" current  ");
+       if (state == TASK_RUNNING)
+               printk(" running ");
        else
                printk(" %08lX ", thread_saved_pc(p));
 #else
-       if (p == current)
-               printk("   current task   ");
+       if (state == TASK_RUNNING)
+               printk("  running task   ");
        else
                printk(" %016lx ", thread_saved_pc(p));
 #endif
+#ifdef CONFIG_DEBUG_STACK_USAGE
        {
                unsigned long * n = (unsigned long *) (p->thread_info+1);
                while (!*n)
                        n++;
                free = (unsigned long) n - (unsigned long)(p->thread_info+1);
        }
+#endif
        printk("%5lu %5d %6d ", free, p->pid, p->parent->pid);
        if ((relative = eldest_child(p)))
                printk("%5d ", relative->pid);
@@ -2576,7 +2579,8 @@ static void show_task(task_t * p)
        else
                printk(" (NOTLB)\n");
 
-       show_stack(p, NULL);
+       if (state != TASK_RUNNING)
+               show_stack(p, NULL);
 }
 
 void show_state(void)
@@ -2585,12 +2589,12 @@ void show_state(void)
 
 #if (BITS_PER_LONG == 32)
        printk("\n"
-              "                         free                        sibling\n");
-       printk("  task             PC    stack   pid father child younger older\n");
+              "                                               sibling\n");
+       printk("  task             PC      pid father child younger older\n");
 #else
        printk("\n"
-              "                                 free                        sibling\n");
-       printk("  task                 PC        stack   pid father child younger older\n");
+              "                                                       sibling\n");
+       printk("  task                 PC          pid father child younger older\n");
 #endif
        read_lock(&tasklist_lock);
        do_each_thread(g, p) {
@@ -2711,12 +2715,6 @@ out:
        local_irq_restore(flags);
 }
 
-typedef struct {
-       int cpu;
-       struct completion startup_done;
-       task_t *task;
-} migration_startup_t;
-
 /*
  * migration_thread - this is a highprio system thread that performs
  * thread migration by bumping thread off CPU then 'pushing' onto
@@ -2726,27 +2724,17 @@ static int migration_thread(void * data)
 {
        /* Marking "param" __user is ok, since we do a set_fs(KERNEL_DS); */
        struct sched_param __user param = { .sched_priority = MAX_RT_PRIO-1 };
-       migration_startup_t *startup = data;
-       int cpu = startup->cpu;
        runqueue_t *rq;
+       int cpu = (long)data;
        int ret;
 
-       startup->task = current;
-       complete(&startup->startup_done);
-       set_current_state(TASK_UNINTERRUPTIBLE);
-       schedule();
-
        BUG_ON(smp_processor_id() != cpu);
-
-       daemonize("migration/%d", cpu);
-       set_fs(KERNEL_DS);
-
        ret = setscheduler(0, SCHED_FIFO, &param);
 
        rq = this_rq();
-       rq->migration_thread = current;
+       BUG_ON(rq->migration_thread != current);
 
-       for (;;) {
+       while (!kthread_should_stop()) {
                struct list_head *head;
                migration_req_t *req;
 
@@ -2769,6 +2757,7 @@ static int migration_thread(void * data)
                               any_online_cpu(req->task->cpus_allowed));
                complete(&req->done);
        }
+       return 0;
 }
 
 /*
@@ -2778,47 +2767,43 @@ static int migration_thread(void * data)
 static int migration_call(struct notifier_block *nfb, unsigned long action,
                          void *hcpu)
 {
-       long cpu = (long)hcpu;
-       migration_startup_t startup;
+       int cpu = (long)hcpu;
+       struct task_struct *p;
 
        switch (action) {
+       case CPU_UP_PREPARE:
+               p = kthread_create(migration_thread, hcpu, "migration/%d",cpu);
+               if (IS_ERR(p))
+                       return NOTIFY_BAD;
+               kthread_bind(p, cpu);
+               cpu_rq(cpu)->migration_thread = p;
+               break;
        case CPU_ONLINE:
-
-               printk("Starting migration thread for cpu %li\n", cpu);
-
-               startup.cpu = cpu;
-               startup.task = NULL;
-               init_completion(&startup.startup_done);
-
-               kernel_thread(migration_thread, &startup, CLONE_KERNEL);
-               wait_for_completion(&startup.startup_done);
-               wait_task_inactive(startup.task);
-
-               startup.task->thread_info->cpu = cpu;
-               startup.task->cpus_allowed = cpumask_of_cpu(cpu);
-
-               wake_up_process(startup.task);
-
-               while (!cpu_rq(cpu)->migration_thread)
-                       yield();
-
+               /* Strictly unneccessary, as first user will wake it. */
+               wake_up_process(cpu_rq(cpu)->migration_thread);
                break;
        }
        return NOTIFY_OK;
 }
 
-static struct notifier_block migration_notifier
-                       = { .notifier_call = &migration_call };
+/*
+ * We want this after the other threads, so they can use set_cpus_allowed
+ * from their CPU_OFFLINE callback
+ */
+static struct notifier_block __devinitdata migration_notifier = {
+       .notifier_call = migration_call,
+       .priority = -10,
+};
 
-__init int migration_init(void)
+int __init migration_init(void)
 {
+       void *cpu = (void *)(long)smp_processor_id();
        /* Start one for boot CPU. */
-       migration_call(&migration_notifier, CPU_ONLINE,
-                      (void *)(long)smp_processor_id());
+       migration_call(&migration_notifier, CPU_UP_PREPARE, cpu);
+       migration_call(&migration_notifier, CPU_ONLINE, cpu);
        register_cpu_notifier(&migration_notifier);
        return 0;
 }
-
 #endif
 
 /*
@@ -2836,45 +2821,11 @@ __init int migration_init(void)
 spinlock_t kernel_flag __cacheline_aligned_in_smp = SPIN_LOCK_UNLOCKED;
 EXPORT_SYMBOL(kernel_flag);
 
-static void kstat_init_cpu(int cpu)
-{
-       /* Add any initialisation to kstat here */
-       /* Useful when cpu offlining logic is added.. */
-}
-
-static int __devinit kstat_cpu_notify(struct notifier_block *self,
-                                     unsigned long action, void *hcpu)
-{
-       int cpu = (unsigned long)hcpu;
-       switch(action) {
-       case CPU_UP_PREPARE:
-               kstat_init_cpu(cpu);
-               break;
-       default:
-               break;
-       }
-       return NOTIFY_OK;
-}
-
-static struct notifier_block __devinitdata kstat_nb = {
-       .notifier_call  = kstat_cpu_notify,
-       .next           = NULL,
-};
-
-__init static void init_kstat(void)
-{
-       kstat_cpu_notify(&kstat_nb, (unsigned long)CPU_UP_PREPARE,
-                        (void *)(long)smp_processor_id());
-       register_cpu_notifier(&kstat_nb);
-}
-
 void __init sched_init(void)
 {
        runqueue_t *rq;
        int i, j, k;
 
-       /* Init the kstat counters */
-       init_kstat();
        for (i = 0; i < NR_CPUS; i++) {
                prio_array_t *array;