UBUNTU: Ubuntu-2.6.38-12.51

[linux-flexiantxendom0-natty.git] / kernel / sched_rt.c
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c

index d10c80e..01f75a5 100644 (file)
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -183,6 +183,17 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
         return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
  }
  
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_add_rcu(&rt_rq->leaf_rt_rq_list,
+                       &rq_of_rt_rq(rt_rq)->leaf_rt_rq_list);
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+       list_del_rcu(&rt_rq->leaf_rt_rq_list);
+}
+
  #define for_each_leaf_rt_rq(rt_rq, rq) \
         list_for_each_entry_rcu(rt_rq, &rq->leaf_rt_rq_list, leaf_rt_rq_list)
  
@@ -199,11 +210,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
  
  static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  {
-       int this_cpu = smp_processor_id();
         struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr;
         struct sched_rt_entity *rt_se;
  
-       rt_se = rt_rq->tg->rt_se[this_cpu];
+       int cpu = cpu_of(rq_of_rt_rq(rt_rq));
+
+       rt_se = rt_rq->tg->rt_se[cpu];
  
         if (rt_rq->rt_nr_running) {
                 if (rt_se && !on_rt_rq(rt_se))
@@ -215,10 +227,10 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
  
  static void sched_rt_rq_dequeue(struct rt_rq *rt_rq)
  {
-       int this_cpu = smp_processor_id();
         struct sched_rt_entity *rt_se;
+       int cpu = cpu_of(rq_of_rt_rq(rt_rq));
  
-       rt_se = rt_rq->tg->rt_se[this_cpu];
+       rt_se = rt_rq->tg->rt_se[cpu];
  
         if (rt_se && on_rt_rq(rt_se))
                 dequeue_rt_entity(rt_se);
@@ -276,6 +288,14 @@ static inline u64 sched_rt_period(struct rt_rq *rt_rq)
         return ktime_to_ns(def_rt_bandwidth.rt_period);
  }
  
+static inline void list_add_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
+static inline void list_del_leaf_rt_rq(struct rt_rq *rt_rq)
+{
+}
+
  #define for_each_leaf_rt_rq(rt_rq, rq) \
         for (rt_rq = &rq->rt; rt_rq; rt_rq = NULL)
  
@@ -546,8 +566,11 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
                         if (rt_rq->rt_time || rt_rq->rt_nr_running)
                                 idle = 0;
                         raw_spin_unlock(&rt_rq->rt_runtime_lock);
-               } else if (rt_rq->rt_nr_running)
+               } else if (rt_rq->rt_nr_running) {
                         idle = 0;
+                       if (!rt_rq_throttled(rt_rq))
+                               enqueue = 1;
+               }
  
                 if (enqueue)
                         sched_rt_rq_enqueue(rt_rq);
@@ -606,10 +629,10 @@ static void update_curr_rt(struct rq *rq)
         struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
         u64 delta_exec;
  
-       if (!task_has_rt_policy(curr))
+       if (curr->sched_class != &rt_sched_class)
                 return;
  
-       delta_exec = rq->clock - curr->se.exec_start;
+       delta_exec = rq->clock_task - curr->se.exec_start;
         if (unlikely((s64)delta_exec < 0))
                 delta_exec = 0;
  
@@ -618,7 +641,7 @@ static void update_curr_rt(struct rq *rq)
         curr->se.sum_exec_runtime += delta_exec;
         account_group_exec_runtime(curr, delta_exec);
  
-       curr->se.exec_start = rq->clock;
+       curr->se.exec_start = rq->clock_task;
         cpuacct_charge(curr, delta_exec);
  
         sched_rt_avg_update(rq, delta_exec);
@@ -825,6 +848,9 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
         if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
                 return;
  
+       if (!rt_rq->rt_nr_running)
+               list_add_leaf_rt_rq(rt_rq);
+
         if (head)
                 list_add(&rt_se->run_list, queue);
         else
@@ -844,6 +870,8 @@ static void __dequeue_rt_entity(struct sched_rt_entity *rt_se)
                 __clear_bit(rt_se_prio(rt_se), array->bitmap);
  
         dec_rt_tasks(rt_se, rt_rq);
+       if (!rt_rq->rt_nr_running)
+               list_del_leaf_rt_rq(rt_rq);
  }
  
  /*
@@ -960,18 +988,19 @@ select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
          * runqueue. Otherwise simply start this RT task
          * on its current runqueue.
          *
-        * We want to avoid overloading runqueues. Even if
-        * the RT task is of higher priority than the current RT task.
-        * RT tasks behave differently than other tasks. If
-        * one gets preempted, we try to push it off to another queue.
-        * So trying to keep a preempting RT task on the same
-        * cache hot CPU will force the running RT task to
-        * a cold CPU. So we waste all the cache for the lower
-        * RT task in hopes of saving some of a RT task
-        * that is just being woken and probably will have
-        * cold cache anyway.
+        * We want to avoid overloading runqueues. If the woken
+        * task is a higher priority, then it will stay on this CPU
+        * and the lower prio task should be moved to another CPU.
+        * Even though this will probably make the lower prio task
+        * lose its cache, we do not want to bounce a higher task
+        * around just because it gave up its CPU, perhaps for a
+        * lock?
+        *
+        * For equal prio tasks, we just let the scheduler sort it out.
          */
         if (unlikely(rt_task(rq->curr)) &&
+           (rq->curr->rt.nr_cpus_allowed < 2 ||
+            rq->curr->prio < p->prio) &&
             (p->rt.nr_cpus_allowed > 1)) {
                 int cpu = find_lowest_rq(p);
  
@@ -1074,7 +1103,7 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
         } while (rt_rq);
  
         p = rt_task_of(rt_se);
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         return p;
  }
@@ -1139,7 +1168,7 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
         for_each_leaf_rt_rq(rt_rq, rq) {
                 array = &rt_rq->active;
                 idx = sched_find_first_bit(array->bitmap);
- next_idx:
+next_idx:
                 if (idx >= MAX_RT_PRIO)
                         continue;
                 if (next && next->prio < idx)
@@ -1315,7 +1344,7 @@ static int push_rt_task(struct rq *rq)
         if (!next_task)
                 return 0;
  
- retry:
+retry:
         if (unlikely(next_task == rq->curr)) {
                 WARN_ON(1);
                 return 0;
@@ -1463,7 +1492,7 @@ static int pull_rt_task(struct rq *this_rq)
                          * but possible)
                          */
                 }
- skip:
+skip:
                 double_unlock_balance(this_rq, src_rq);
         }
  
@@ -1491,7 +1520,10 @@ static void task_woken_rt(struct rq *rq, struct task_struct *p)
         if (!task_running(rq, p) &&
             !test_tsk_need_resched(rq->curr) &&
             has_pushable_tasks(rq) &&
-           p->rt.nr_cpus_allowed > 1)
+           p->rt.nr_cpus_allowed > 1 &&
+           rt_task(rq->curr) &&
+           (rq->curr->rt.nr_cpus_allowed < 2 ||
+            rq->curr->prio < p->prio))
                 push_rt_tasks(rq);
  }
  
@@ -1709,7 +1741,7 @@ static void set_curr_task_rt(struct rq *rq)
  {
         struct task_struct *p = rq->curr;
  
-       p->se.exec_start = rq->clock;
+       p->se.exec_start = rq->clock_task;
  
         /* The running task is never eligible for pushing */
         dequeue_pushable_task(rq, p);