Update to 3.4-final.

[linux-flexiantxendom0-3.2.10.git] / kernel / sched / core.c
diff --git a/kernel/sched/core.c b/kernel/sched/core.c

index 503d642..9a7fe31 100644 (file)
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -73,6 +73,7 @@
  #include <linux/init_task.h>
  #include <linux/binfmts.h>
  
+#include <asm/switch_to.h>
  #include <asm/tlb.h>
  #include <asm/irq_regs.h>
  #include <asm/mutex.h>
@@ -1264,29 +1265,59 @@ EXPORT_SYMBOL_GPL(kick_process);
   */
  static int select_fallback_rq(int cpu, struct task_struct *p)
  {
-       int dest_cpu;
         const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+       enum { cpuset, possible, fail } state = cpuset;
+       int dest_cpu;
  
         /* Look for allowed, online CPU in same node. */
-       for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+       for_each_cpu(dest_cpu, nodemask) {
+               if (!cpu_online(dest_cpu))
+                       continue;
+               if (!cpu_active(dest_cpu))
+                       continue;
                 if (cpumask_test_cpu(dest_cpu, tsk_cpus_allowed(p)))
                         return dest_cpu;
+       }
+
+       for (;;) {
+               /* Any allowed, online CPU? */
+               for_each_cpu(dest_cpu, tsk_cpus_allowed(p)) {
+                       if (!cpu_online(dest_cpu))
+                               continue;
+                       if (!cpu_active(dest_cpu))
+                               continue;
+                       goto out;
+               }
  
-       /* Any allowed, online CPU? */
-       dest_cpu = cpumask_any_and(tsk_cpus_allowed(p), cpu_active_mask);
-       if (dest_cpu < nr_cpu_ids)
-               return dest_cpu;
+               switch (state) {
+               case cpuset:
+                       /* No more Mr. Nice Guy. */
+                       cpuset_cpus_allowed_fallback(p);
+                       state = possible;
+                       break;
  
-       /* No more Mr. Nice Guy. */
-       dest_cpu = cpuset_cpus_allowed_fallback(p);
-       /*
-        * Don't tell them about moving exiting tasks or
-        * kernel threads (both mm NULL), since they never
-        * leave kernel.
-        */
-       if (p->mm && printk_ratelimit()) {
-               printk_sched("process %d (%s) no longer affine to cpu%d\n",
-                               task_pid_nr(p), p->comm, cpu);
+               case possible:
+                       do_set_cpus_allowed(p, cpu_possible_mask);
+                       state = fail;
+                       break;
+
+               case fail:
+                       BUG();
+                       break;
+               }
+       }
+
+out:
+       if (state != cpuset) {
+               /*
+                * Don't tell them about moving exiting tasks or
+                * kernel threads (both mm NULL), since they never
+                * leave kernel.
+                */
+               if (p->mm && printk_ratelimit()) {
+                       printk_sched("process %d (%s) no longer affine to cpu%d\n",
+                                       task_pid_nr(p), p->comm, cpu);
+               }
         }
  
         return dest_cpu;
@@ -1933,6 +1964,7 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
         local_irq_enable();
  #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
         finish_lock_switch(rq, prev);
+       finish_arch_post_lock_switch();
  
         fire_sched_in_preempt_notifiers(current);
         if (mm)
@@ -2624,6 +2656,48 @@ static inline void task_group_account_field(struct task_struct *p, int index,
  }
  
  
+#if !defined(CONFIG_XEN) || defined(CONFIG_VIRT_CPU_ACCOUNTING)
+# define cputime_to_u64(t) ((__force u64)(t))
+#else
+# include <linux/syscore_ops.h>
+# define NS_PER_TICK (1000000000 / HZ)
+
+static DEFINE_PER_CPU(u64, steal_snapshot);
+static DEFINE_PER_CPU(unsigned int, steal_residual);
+
+static u64 cputime_to_u64(cputime_t t)
+{
+       u64 s = this_vcpu_read(runstate.time[RUNSTATE_runnable]);
+       unsigned long adj = div_u64_rem(s - __this_cpu_read(steal_snapshot)
+                                         + __this_cpu_read(steal_residual),
+                                       NS_PER_TICK,
+                                       &__get_cpu_var(steal_residual));
+
+       __this_cpu_write(steal_snapshot, s);
+       if (t < jiffies_to_cputime(adj))
+               return 0;
+
+       return (__force u64)(t - jiffies_to_cputime(adj));
+}
+
+static void steal_resume(void)
+{
+       cputime_to_u64(((cputime_t)1 << (BITS_PER_LONG * sizeof(cputime_t)
+                                        / sizeof(long) - 1)) - 1);
+}
+
+static struct syscore_ops steal_syscore_ops = {
+       .resume = steal_resume,
+};
+
+static int __init steal_register(void)
+{
+       register_syscore_ops(&steal_syscore_ops);
+       return 0;
+}
+core_initcall(steal_register);
+#endif
+
  /*
   * Account user cpu time to a process.
   * @p: the process that the cpu time gets accounted to
@@ -2643,7 +2717,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
         index = (TASK_NICE(p) > 0) ? CPUTIME_NICE : CPUTIME_USER;
  
         /* Add user time to cpustat. */
-       task_group_account_field(p, index, (__force u64) cputime);
+       task_group_account_field(p, index, cputime_to_u64(cputime));
  
         /* Account for user time used */
         acct_update_integrals(p);
@@ -2693,7 +2767,7 @@ void __account_system_time(struct task_struct *p, cputime_t cputime,
         account_group_system_time(p, cputime);
  
         /* Add system time to cpustat. */
-       task_group_account_field(p, index, (__force u64) cputime);
+       task_group_account_field(p, index, cputime_to_u64(cputime));
  
         /* Account for system time used */
         acct_update_integrals(p);
@@ -2747,9 +2821,9 @@ void account_idle_time(cputime_t cputime)
         struct rq *rq = this_rq();
  
         if (atomic_read(&rq->nr_iowait) > 0)
-               cpustat[CPUTIME_IOWAIT] += (__force u64) cputime;
+               cpustat[CPUTIME_IOWAIT] += cputime_to_u64(cputime);
         else
-               cpustat[CPUTIME_IDLE] += (__force u64) cputime;
+               cpustat[CPUTIME_IDLE] += cputime_to_u64(cputime);
  }
  
  static __always_inline bool steal_account_process_tick(void)
@@ -2805,9 +2879,9 @@ static void irqtime_account_process_tick(struct task_struct *p, int user_tick,
                 return;
  
         if (irqtime_account_hi_update()) {
-               cpustat[CPUTIME_IRQ] += (__force u64) cputime_one_jiffy;
+               cpustat[CPUTIME_IRQ] += cputime_to_u64(cputime_one_jiffy);
         } else if (irqtime_account_si_update()) {
-               cpustat[CPUTIME_SOFTIRQ] += (__force u64) cputime_one_jiffy;
+               cpustat[CPUTIME_SOFTIRQ] += cputime_to_u64(cputime_one_jiffy);
         } else if (this_cpu_ksoftirqd() == p) {
                 /*
                  * ksoftirqd time do not get accounted in cpu_softirq_time.
@@ -3070,8 +3144,6 @@ EXPORT_SYMBOL(sub_preempt_count);
   */
  static noinline void __schedule_bug(struct task_struct *prev)
  {
-       struct pt_regs *regs = get_irq_regs();
-
         if (oops_in_progress)
                 return;
  
@@ -3082,11 +3154,7 @@ static noinline void __schedule_bug(struct task_struct *prev)
         print_modules();
         if (irqs_disabled())
                 print_irqtrace_events(prev);
-
-       if (regs)
-               show_regs(regs);
-       else
-               dump_stack();
+       dump_stack();
  }
  
  /*
@@ -3259,6 +3327,11 @@ void __sched schedule_preempt_disabled(void)
  }
  
  #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
+#include <asm/mutex.h>
+
+#ifndef arch_cpu_is_running
+#define arch_cpu_is_running(cpu) true
+#endif
  
  static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
  {
@@ -3273,7 +3346,8 @@ static inline bool owner_running(struct mutex *lock, struct task_struct *owner)
          */
         barrier();
  
-       return owner->on_cpu;
+       return owner->on_cpu
+              && arch_cpu_is_running(task_thread_info(owner)->cpu);
  }
  
  /*
@@ -6356,6 +6430,8 @@ static int __sdt_alloc(const struct cpumask *cpu_map)
                         if (!sg)
                                 return -ENOMEM;
  
+                       sg->next = sg;
+
                         *per_cpu_ptr(sdd->sg, j) = sg;
  
                         sgp = kzalloc_node(sizeof(struct sched_group_power),
@@ -6379,16 +6455,26 @@ static void __sdt_free(const struct cpumask *cpu_map)
                 struct sd_data *sdd = &tl->data;
  
                 for_each_cpu(j, cpu_map) {
-                       struct sched_domain *sd = *per_cpu_ptr(sdd->sd, j);
-                       if (sd && (sd->flags & SD_OVERLAP))
-                               free_sched_groups(sd->groups, 0);
-                       kfree(*per_cpu_ptr(sdd->sd, j));
-                       kfree(*per_cpu_ptr(sdd->sg, j));
-                       kfree(*per_cpu_ptr(sdd->sgp, j));
+                       struct sched_domain *sd;
+
+                       if (sdd->sd) {
+                               sd = *per_cpu_ptr(sdd->sd, j);
+                               if (sd && (sd->flags & SD_OVERLAP))
+                                       free_sched_groups(sd->groups, 0);
+                               kfree(*per_cpu_ptr(sdd->sd, j));
+                       }
+
+                       if (sdd->sg)
+                               kfree(*per_cpu_ptr(sdd->sg, j));
+                       if (sdd->sgp)
+                               kfree(*per_cpu_ptr(sdd->sgp, j));
                 }
                 free_percpu(sdd->sd);
+               sdd->sd = NULL;
                 free_percpu(sdd->sg);
+               sdd->sg = NULL;
                 free_percpu(sdd->sgp);
+               sdd->sgp = NULL;
         }
  }